github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/ptrace.cc (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <elf.h>
    16  #include <signal.h>
    17  #include <stddef.h>
    18  #include <sys/prctl.h>
    19  #include <sys/ptrace.h>
    20  #include <sys/socket.h>
    21  #include <sys/time.h>
    22  #include <sys/types.h>
    23  #include <sys/user.h>
    24  #include <sys/wait.h>
    25  #include <unistd.h>
    26  
    27  #include <iostream>
    28  #include <utility>
    29  
    30  #include "gmock/gmock.h"
    31  #include "gtest/gtest.h"
    32  #include "absl/flags/flag.h"
    33  #include "absl/strings/string_view.h"
    34  #include "absl/time/clock.h"
    35  #include "absl/time/time.h"
    36  #include "test/util/capability_util.h"
    37  #include "test/util/fs_util.h"
    38  #include "test/util/logging.h"
    39  #include "test/util/memory_util.h"
    40  #include "test/util/multiprocess_util.h"
    41  #include "test/util/platform_util.h"
    42  #include "test/util/signal_util.h"
    43  #include "test/util/temp_path.h"
    44  #include "test/util/test_util.h"
    45  #include "test/util/thread_util.h"
    46  #include "test/util/time_util.h"
    47  
    48  ABSL_FLAG(bool, ptrace_test_execve_child, false,
    49            "If true, run the "
    50            "PtraceExecveTest_Execve_GetRegs_PeekUser_SIGKILL_TraceClone_"
    51            "TraceExit child workload.");
    52  ABSL_FLAG(bool, ptrace_test_trace_descendants_allowed, false,
    53            "If set, run the child workload for "
    54            "PtraceTest_TraceDescendantsAllowed.");
    55  ABSL_FLAG(bool, ptrace_test_ptrace_attacher, false,
    56            "If set, run the child workload for PtraceAttacherSubprocess.");
    57  ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer, false,
    58            "If set, run the child workload for PrctlSetPtracerSubprocess.");
    59  ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_and_exit_tracee_thread, false,
    60            "If set, run the child workload for "
    61            "PtraceTest_PrctlSetPtracerPersistsPastTraceeThreadExit.");
    62  ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_and_exec_non_leader, false,
    63            "If set, run the child workload for "
    64            "PtraceTest_PrctlSetPtracerDoesNotPersistPastNonLeaderExec.");
    65  ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_and_exit_tracer_thread, false,
    66            "If set, run the child workload for "
    67            "PtraceTest_PrctlSetPtracerDoesNotPersistPastTracerThreadExit.");
    68  ABSL_FLAG(int, ptrace_test_prctl_set_ptracer_and_exit_tracer_thread_tid, -1,
    69            "Specifies the tracee tid in the child workload for "
    70            "PtraceTest_PrctlSetPtracerDoesNotPersistPastTracerThreadExit.");
    71  ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_respects_tracer_thread_id, false,
    72            "If set, run the child workload for PtraceTest_PrctlSetPtracePID.");
    73  ABSL_FLAG(int, ptrace_test_prctl_set_ptracer_respects_tracer_thread_id_tid, -1,
    74            "Specifies the thread tid to be traced in the child workload "
    75            "for PtraceTest_PrctlSetPtracerRespectsTracerThreadID.");
    76  
    77  ABSL_FLAG(bool, ptrace_test_tracee, false,
    78            "If true, run the tracee process for the "
    79            "PrctlSetPtracerDoesNotPersistPastLeaderExec and "
    80            "PrctlSetPtracerDoesNotPersistPastNonLeaderExec workloads.");
    81  ABSL_FLAG(int, ptrace_test_trace_tid, -1,
    82            "If set, run a process to ptrace attach to the thread with the "
    83            "specified pid for the PrctlSetPtracerRespectsTracerThreadID "
    84            "workload.");
    85  ABSL_FLAG(int, ptrace_test_fd, -1,
    86            "Specifies the fd used for communication between tracer and tracee "
    87            "processes across exec.");
    88  
    89  namespace gvisor {
    90  namespace testing {
    91  
    92  namespace {
    93  
    94  // PTRACE_GETSIGMASK and PTRACE_SETSIGMASK are not defined until glibc 2.23
    95  // (fb53a27c5741 "Add new header definitions from Linux 4.4 (plus older ptrace
    96  // definitions)").
    97  constexpr auto kPtraceGetSigMask = static_cast<__ptrace_request>(0x420a);
    98  constexpr auto kPtraceSetSigMask = static_cast<__ptrace_request>(0x420b);
    99  
   100  // PTRACE_SYSEMU is not defined until glibc 2.27 (c48831d0eebf "linux/x86: sync
   101  // sys/ptrace.h with Linux 4.14 [BZ #22433]").
   102  constexpr auto kPtraceSysemu = static_cast<__ptrace_request>(31);
   103  
   104  // PTRACE_EVENT_STOP is not defined until glibc 2.26 (3f67d1a7021e "Add Linux
   105  // PTRACE_EVENT_STOP").
   106  constexpr int kPtraceEventStop = 128;
   107  
   108  // Sends sig to the current process with tgkill(2).
   109  //
   110  // glibc's raise(2) may change the signal mask before sending the signal. These
   111  // extra syscalls make tests of syscall, signal interception, etc. difficult to
   112  // write.
   113  void RaiseSignal(int sig) {
   114    pid_t pid = getpid();
   115    TEST_PCHECK(pid > 0);
   116    pid_t tid = gettid();
   117    TEST_PCHECK(tid > 0);
   118    TEST_PCHECK(tgkill(pid, tid, sig) == 0);
   119  }
   120  
   121  constexpr char kYamaPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope";
   122  
   123  // Returns the Yama ptrace scope.
   124  PosixErrorOr<int> YamaPtraceScope() {
   125    ASSIGN_OR_RETURN_ERRNO(bool exists, Exists(kYamaPtraceScopePath));
   126    if (!exists) {
   127      // File doesn't exist means no Yama, so the scope is disabled -> 0.
   128      return 0;
   129    }
   130  
   131    std::string contents;
   132    RETURN_IF_ERRNO(GetContents(kYamaPtraceScopePath, &contents));
   133  
   134    int scope;
   135    if (!absl::SimpleAtoi(contents, &scope)) {
   136      return PosixError(EINVAL, absl::StrCat(contents, ": not a valid number"));
   137    }
   138  
   139    return scope;
   140  }
   141  
   142  int CheckPtraceAttach(pid_t pid) {
   143    int ret = ptrace(PTRACE_ATTACH, pid, 0, 0);
   144    MaybeSave();
   145    if (ret < 0) {
   146      return ret;
   147    }
   148  
   149    int status;
   150    TEST_PCHECK(waitpid(pid, &status, 0) == pid);
   151    MaybeSave();
   152    TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
   153    TEST_PCHECK(ptrace(PTRACE_DETACH, pid, 0, 0) == 0);
   154    MaybeSave();
   155    return 0;
   156  }
   157  
   158  class SimpleSubprocess {
   159   public:
   160    explicit SimpleSubprocess(absl::string_view child_flag) {
   161      int sockets[2];
   162      TEST_PCHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == 0);
   163  
   164      // Allocate vector before forking (not async-signal-safe).
   165      ExecveArray const owned_child_argv = {"/proc/self/exe", child_flag,
   166                                            "--ptrace_test_fd",
   167                                            std::to_string(sockets[0])};
   168      char* const* const child_argv = owned_child_argv.get();
   169  
   170      pid_ = fork();
   171      if (pid_ == 0) {
   172        TEST_PCHECK(close(sockets[1]) == 0);
   173        execve(child_argv[0], child_argv, /* envp = */ nullptr);
   174        TEST_PCHECK_MSG(false, "Survived execve to test child");
   175      }
   176      TEST_PCHECK(pid_ > 0);
   177      TEST_PCHECK(close(sockets[0]) == 0);
   178      sockfd_ = sockets[1];
   179    }
   180  
   181    SimpleSubprocess(SimpleSubprocess&& orig)
   182        : pid_(orig.pid_), sockfd_(orig.sockfd_) {
   183      orig.pid_ = -1;
   184      orig.sockfd_ = -1;
   185    }
   186  
   187    SimpleSubprocess& operator=(SimpleSubprocess&& orig) {
   188      if (this != &orig) {
   189        this->~SimpleSubprocess();
   190        pid_ = orig.pid_;
   191        sockfd_ = orig.sockfd_;
   192        orig.pid_ = -1;
   193        orig.sockfd_ = -1;
   194      }
   195      return *this;
   196    }
   197  
   198    SimpleSubprocess(SimpleSubprocess const&) = delete;
   199    SimpleSubprocess& operator=(SimpleSubprocess const&) = delete;
   200  
   201    ~SimpleSubprocess() {
   202      if (pid_ < 0) {
   203        return;
   204      }
   205      EXPECT_THAT(shutdown(sockfd_, SHUT_RDWR), SyscallSucceeds());
   206      EXPECT_THAT(close(sockfd_), SyscallSucceeds());
   207      int status;
   208      EXPECT_THAT(waitpid(pid_, &status, 0), SyscallSucceedsWithValue(pid_));
   209      EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   210          << " status " << status;
   211    }
   212  
   213    pid_t pid() const { return pid_; }
   214  
   215    // Sends the child process the given value, receives an errno in response, and
   216    // returns a PosixError corresponding to the received errno.
   217    template <typename T>
   218    PosixError Cmd(T val) {
   219      if (WriteFd(sockfd_, &val, sizeof(val)) < 0) {
   220        return PosixError(errno, "write failed");
   221      }
   222      return RecvErrno();
   223    }
   224  
   225   private:
   226    PosixError RecvErrno() {
   227      int resp_errno;
   228      if (ReadFd(sockfd_, &resp_errno, sizeof(resp_errno)) < 0) {
   229        return PosixError(errno, "read failed");
   230      }
   231      return PosixError(resp_errno);
   232    }
   233  
   234    pid_t pid_ = -1;
   235    int sockfd_ = -1;
   236  };
   237  
   238  TEST(PtraceTest, AttachSelf) {
   239    EXPECT_THAT(ptrace(PTRACE_ATTACH, gettid(), 0, 0),
   240                SyscallFailsWithErrno(EPERM));
   241  }
   242  
   243  TEST(PtraceTest, AttachSameThreadGroup) {
   244    pid_t const tid = gettid();
   245    ScopedThread([&] {
   246      EXPECT_THAT(ptrace(PTRACE_ATTACH, tid, 0, 0), SyscallFailsWithErrno(EPERM));
   247    });
   248  }
   249  
   250  TEST(PtraceTest, TraceParentNotAllowed) {
   251    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) < 1);
   252    AutoCapability cap(CAP_SYS_PTRACE, false);
   253  
   254    pid_t const child_pid = fork();
   255    if (child_pid == 0) {
   256      TEST_CHECK(CheckPtraceAttach(getppid()) == -1);
   257      TEST_PCHECK(errno == EPERM);
   258      _exit(0);
   259    }
   260    ASSERT_THAT(child_pid, SyscallSucceeds());
   261  
   262    int status;
   263    ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
   264    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   265        << " status " << status;
   266  }
   267  
   268  TEST(PtraceTest, TraceNonDescendantNotAllowed) {
   269    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) < 1);
   270    AutoCapability cap(CAP_SYS_PTRACE, false);
   271  
   272    pid_t const tracee_pid = fork();
   273    if (tracee_pid == 0) {
   274      while (true) {
   275        SleepSafe(absl::Seconds(1));
   276      }
   277    }
   278    ASSERT_THAT(tracee_pid, SyscallSucceeds());
   279  
   280    pid_t const tracer_pid = fork();
   281    if (tracer_pid == 0) {
   282      TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1);
   283      TEST_PCHECK(errno == EPERM);
   284      _exit(0);
   285    }
   286    EXPECT_THAT(tracer_pid, SyscallSucceeds());
   287  
   288    // Clean up tracer.
   289    int status;
   290    ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
   291    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0);
   292  
   293    // Clean up tracee.
   294    ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
   295    ASSERT_THAT(waitpid(tracee_pid, &status, 0),
   296                SyscallSucceedsWithValue(tracee_pid));
   297    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
   298        << " status " << status;
   299  }
   300  
   301  TEST(PtraceTest, TraceNonDescendantWithCapabilityAllowed) {
   302    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_PTRACE)));
   303    // Skip if disallowed by YAMA despite having CAP_SYS_PTRACE.
   304    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 2);
   305  
   306    pid_t const tracee_pid = fork();
   307    if (tracee_pid == 0) {
   308      while (true) {
   309        SleepSafe(absl::Seconds(1));
   310      }
   311    }
   312    ASSERT_THAT(tracee_pid, SyscallSucceeds());
   313  
   314    pid_t const tracer_pid = fork();
   315    if (tracer_pid == 0) {
   316      TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0);
   317      _exit(0);
   318    }
   319    ASSERT_THAT(tracer_pid, SyscallSucceeds());
   320  
   321    // Clean up tracer.
   322    int status;
   323    ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
   324    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0);
   325  
   326    // Clean up tracee.
   327    ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
   328    ASSERT_THAT(waitpid(tracee_pid, &status, 0),
   329                SyscallSucceedsWithValue(tracee_pid));
   330    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
   331        << " status " << status;
   332  }
   333  
   334  TEST(PtraceTest, TraceDescendantsAllowed) {
   335    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 1);
   336    AutoCapability cap(CAP_SYS_PTRACE, false);
   337  
   338    // Use socket pair to communicate tids to this process from its grandchild.
   339    int sockets[2];
   340    ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
   341  
   342    // Allocate vector before forking (not async-signal-safe).
   343    ExecveArray const owned_child_argv = {
   344        "/proc/self/exe", "--ptrace_test_trace_descendants_allowed",
   345        "--ptrace_test_fd", std::to_string(sockets[0])};
   346    char* const* const child_argv = owned_child_argv.get();
   347  
   348    pid_t const child_pid = fork();
   349    if (child_pid == 0) {
   350      // In child process.
   351      TEST_PCHECK(close(sockets[1]) == 0);
   352      pid_t const grandchild_pid = fork();
   353      if (grandchild_pid == 0) {
   354        // This test will create a new thread in the grandchild process.
   355        // pthread_create(2) isn't async-signal-safe, so we execve() first.
   356        execve(child_argv[0], child_argv, /* envp = */ nullptr);
   357        TEST_PCHECK_MSG(false, "Survived execve to test child");
   358      }
   359      TEST_PCHECK(grandchild_pid > 0);
   360      MaybeSave();
   361  
   362      // Wait for grandchild. Our parent process will kill it once it's done.
   363      int status;
   364      TEST_PCHECK(waitpid(grandchild_pid, &status, 0) == grandchild_pid);
   365      TEST_CHECK(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL);
   366      MaybeSave();
   367      _exit(0);
   368    }
   369    ASSERT_THAT(child_pid, SyscallSucceeds());
   370    ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
   371  
   372    // We should be able to attach to any thread in the grandchild.
   373    pid_t grandchild_tid1, grandchild_tid2;
   374    ASSERT_THAT(read(sockets[1], &grandchild_tid1, sizeof(grandchild_tid1)),
   375                SyscallSucceedsWithValue(sizeof(grandchild_tid1)));
   376    ASSERT_THAT(read(sockets[1], &grandchild_tid2, sizeof(grandchild_tid2)),
   377                SyscallSucceedsWithValue(sizeof(grandchild_tid2)));
   378  
   379    EXPECT_THAT(CheckPtraceAttach(grandchild_tid1), SyscallSucceeds());
   380    EXPECT_THAT(CheckPtraceAttach(grandchild_tid2), SyscallSucceeds());
   381  
   382    // Clean up grandchild.
   383    ASSERT_THAT(kill(grandchild_tid1, SIGKILL), SyscallSucceeds());
   384  
   385    // Clean up child.
   386    int status;
   387    ASSERT_THAT(waitpid(child_pid, &status, 0),
   388                SyscallSucceedsWithValue(child_pid));
   389    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   390        << " status " << status;
   391  }
   392  
   393  [[noreturn]] void RunTraceDescendantsAllowed(int fd) {
   394    // Let the tracer know our tid through the socket fd.
   395    pid_t const tid = gettid();
   396    TEST_PCHECK(write(fd, &tid, sizeof(tid)) == sizeof(tid));
   397    MaybeSave();
   398  
   399    ScopedThread t([fd] {
   400      // See if any arbitrary thread (whose tid differs from the process id) can
   401      // be traced as well.
   402      pid_t const tid = gettid();
   403      TEST_PCHECK(write(fd, &tid, sizeof(tid)) == sizeof(tid));
   404      MaybeSave();
   405      while (true) {
   406        SleepSafe(absl::Seconds(1));
   407      }
   408    });
   409  
   410    while (true) {
   411      SleepSafe(absl::Seconds(1));
   412    }
   413  }
   414  
   415  TEST(PtraceTest, PrctlSetPtracerInvalidPID) {
   416    // EINVAL should also be returned if PR_SET_PTRACER is not supported.
   417    EXPECT_THAT(prctl(PR_SET_PTRACER, 123456789), SyscallFailsWithErrno(EINVAL));
   418  }
   419  
   420  SimpleSubprocess CreatePtraceAttacherSubprocess() {
   421    return SimpleSubprocess("--ptrace_test_ptrace_attacher");
   422  }
   423  
   424  [[noreturn]] static void RunPtraceAttacher(int sockfd) {
   425    // execve() may have restored CAP_SYS_PTRACE if we had real UID 0.
   426    TEST_CHECK(SetCapability(CAP_SYS_PTRACE, false).ok());
   427    // Perform PTRACE_ATTACH in a separate thread to verify that permissions
   428    // apply process-wide.
   429    ScopedThread t([&] {
   430      while (true) {
   431        pid_t pid;
   432        int rv = read(sockfd, &pid, sizeof(pid));
   433        if (rv == 0) {
   434          _exit(0);
   435        }
   436        if (rv < 0) {
   437          _exit(1);
   438        }
   439        int resp_errno = 0;
   440        if (CheckPtraceAttach(pid) < 0) {
   441          resp_errno = errno;
   442        }
   443        TEST_PCHECK(write(sockfd, &resp_errno, sizeof(resp_errno)) ==
   444                    sizeof(resp_errno));
   445      }
   446    });
   447    while (true) {
   448      SleepSafe(absl::Seconds(1));
   449    }
   450  }
   451  
   452  SimpleSubprocess CreatePrctlSetPtracerSubprocess() {
   453    return SimpleSubprocess("--ptrace_test_prctl_set_ptracer");
   454  }
   455  
   456  [[noreturn]] static void RunPrctlSetPtracer(int sockfd) {
   457    // Perform prctl in a separate thread to verify that it applies
   458    // process-wide.
   459    ScopedThread t([&] {
   460      while (true) {
   461        pid_t pid;
   462        int rv = read(sockfd, &pid, sizeof(pid));
   463        if (rv == 0) {
   464          _exit(0);
   465        }
   466        if (rv < 0) {
   467          _exit(1);
   468        }
   469        int resp_errno = 0;
   470        if (prctl(PR_SET_PTRACER, pid) < 0) {
   471          resp_errno = errno;
   472        }
   473        TEST_PCHECK(write(sockfd, &resp_errno, sizeof(resp_errno)) ==
   474                    sizeof(resp_errno));
   475      }
   476    });
   477    while (true) {
   478      SleepSafe(absl::Seconds(1));
   479    }
   480  }
   481  
   482  TEST(PtraceTest, PrctlSetPtracer) {
   483    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
   484  
   485    AutoCapability cap(CAP_SYS_PTRACE, false);
   486  
   487    // Ensure that initially, no tracer exception is set.
   488    ASSERT_THAT(prctl(PR_SET_PTRACER, 0), SyscallSucceeds());
   489  
   490    SimpleSubprocess tracee = CreatePrctlSetPtracerSubprocess();
   491    SimpleSubprocess tracer = CreatePtraceAttacherSubprocess();
   492  
   493    // By default, Yama should prevent tracer from tracing its parent (this
   494    // process) or siblings (tracee).
   495    EXPECT_THAT(tracer.Cmd(gettid()), PosixErrorIs(EPERM));
   496    EXPECT_THAT(tracer.Cmd(tracee.pid()), PosixErrorIs(EPERM));
   497  
   498    // If tracee invokes PR_SET_PTRACER on either tracer's pid, the pid of any of
   499    // its ancestors (i.e. us), or PR_SET_PTRACER_ANY, then tracer can trace it
   500    // (but not us).
   501  
   502    ASSERT_THAT(tracee.Cmd(tracer.pid()), PosixErrorIs(0));
   503    EXPECT_THAT(tracer.Cmd(tracee.pid()), PosixErrorIs(0));
   504    EXPECT_THAT(tracer.Cmd(gettid()), PosixErrorIs(EPERM));
   505  
   506    ASSERT_THAT(tracee.Cmd(gettid()), PosixErrorIs(0));
   507    EXPECT_THAT(tracer.Cmd(tracee.pid()), PosixErrorIs(0));
   508    EXPECT_THAT(tracer.Cmd(gettid()), PosixErrorIs(EPERM));
   509  
   510    ASSERT_THAT(tracee.Cmd(static_cast<pid_t>(PR_SET_PTRACER_ANY)),
   511                PosixErrorIs(0));
   512    EXPECT_THAT(tracer.Cmd(tracee.pid()), PosixErrorIs(0));
   513    EXPECT_THAT(tracer.Cmd(gettid()), PosixErrorIs(EPERM));
   514  
   515    // If tracee invokes PR_SET_PTRACER with pid 0, then tracer can no longer
   516    // trace it.
   517    ASSERT_THAT(tracee.Cmd(0), PosixErrorIs(0));
   518    EXPECT_THAT(tracer.Cmd(tracee.pid()), PosixErrorIs(EPERM));
   519  
   520    // If we invoke PR_SET_PTRACER with tracer's pid, then it can trace us (but
   521    // not our descendants).
   522    ASSERT_THAT(prctl(PR_SET_PTRACER, tracer.pid()), SyscallSucceeds());
   523    EXPECT_THAT(tracer.Cmd(gettid()), PosixErrorIs(0));
   524    EXPECT_THAT(tracer.Cmd(tracee.pid()), PosixErrorIs(EPERM));
   525  
   526    // If we invoke PR_SET_PTRACER with pid 0, then tracer can no longer trace us.
   527    ASSERT_THAT(prctl(PR_SET_PTRACER, 0), SyscallSucceeds());
   528    EXPECT_THAT(tracer.Cmd(gettid()), PosixErrorIs(EPERM));
   529  
   530    // Another thread in our thread group can invoke PR_SET_PTRACER instead; its
   531    // effect applies to the whole thread group.
   532    pid_t const our_tid = gettid();
   533    ScopedThread([&] {
   534      ASSERT_THAT(prctl(PR_SET_PTRACER, tracer.pid()), SyscallSucceeds());
   535      EXPECT_THAT(tracer.Cmd(gettid()), PosixErrorIs(0));
   536      EXPECT_THAT(tracer.Cmd(our_tid), PosixErrorIs(0));
   537  
   538      ASSERT_THAT(prctl(PR_SET_PTRACER, 0), SyscallSucceeds());
   539      EXPECT_THAT(tracer.Cmd(gettid()), PosixErrorIs(EPERM));
   540      EXPECT_THAT(tracer.Cmd(our_tid), PosixErrorIs(EPERM));
   541    }).Join();
   542  }
   543  
   544  // Tests that YAMA exceptions store tracees by thread group leader. Exceptions
   545  // are preserved even after the tracee thread exits, as long as the tracee's
   546  // thread group leader is still around.
   547  TEST(PtraceTest, PrctlSetPtracerPersistsPastTraceeThreadExit) {
   548    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
   549    AutoCapability cap(CAP_SYS_PTRACE, false);
   550  
   551    // Use sockets to synchronize between tracer and tracee.
   552    int sockets[2];
   553    ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
   554  
   555    // Allocate vector before forking (not async-signal-safe).
   556    ExecveArray const owned_child_argv = {
   557        "/proc/self/exe",
   558        "--ptrace_test_prctl_set_ptracer_and_exit_tracee_thread",
   559        "--ptrace_test_fd", std::to_string(sockets[0])};
   560    char* const* const child_argv = owned_child_argv.get();
   561  
   562    pid_t const tracee_pid = fork();
   563    if (tracee_pid == 0) {
   564      // This test will create a new thread in the child process.
   565      // pthread_create(2) isn't async-signal-safe, so we execve() first.
   566      TEST_PCHECK(close(sockets[1]) == 0);
   567      execve(child_argv[0], child_argv, /* envp = */ nullptr);
   568      TEST_PCHECK_MSG(false, "Survived execve to test child");
   569    }
   570    ASSERT_THAT(tracee_pid, SyscallSucceeds());
   571    ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
   572  
   573    pid_t const tracer_pid = fork();
   574    if (tracer_pid == 0) {
   575      // Wait until the tracee thread calling prctl has terminated.
   576      char done;
   577      TEST_PCHECK(read(sockets[1], &done, 1) == 1);
   578      MaybeSave();
   579  
   580      TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0);
   581      _exit(0);
   582    }
   583    ASSERT_THAT(tracer_pid, SyscallSucceeds());
   584  
   585    // Clean up tracer.
   586    int status;
   587    ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
   588    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   589        << " status " << status;
   590  
   591    // Clean up tracee.
   592    ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
   593    ASSERT_THAT(waitpid(tracee_pid, &status, 0),
   594                SyscallSucceedsWithValue(tracee_pid));
   595    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
   596        << " status " << status;
   597  }
   598  
   599  [[noreturn]] void RunPrctlSetPtracerPersistsPastTraceeThreadExit(int fd) {
   600    ScopedThread t([] {
   601      TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0);
   602      MaybeSave();
   603    });
   604    t.Join();
   605    // Indicate that thread setting the prctl has exited.
   606    TEST_PCHECK(write(fd, "x", 1) == 1);
   607    MaybeSave();
   608  
   609    while (true) {
   610      SleepSafe(absl::Seconds(1));
   611    }
   612  }
   613  
   614  // Tests that YAMA exceptions store tracees by thread group leader. Exceptions
   615  // are preserved across exec as long as the thread group leader does not change,
   616  // even if the tracee thread is terminated.
   617  TEST(PtraceTest, PrctlSetPtracerPersistsPastLeaderExec) {
   618    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
   619    AutoCapability cap(CAP_SYS_PTRACE, false);
   620  
   621    // Use sockets to synchronize between tracer and tracee.
   622    int sockets[2];
   623    ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
   624  
   625    // Allocate vector before forking (not async-signal-safe).
   626    ExecveArray const owned_child_argv = {
   627        "/proc/self/exe", "--ptrace_test_tracee", "--ptrace_test_fd",
   628        std::to_string(sockets[0])};
   629    char* const* const child_argv = owned_child_argv.get();
   630  
   631    pid_t const tracee_pid = fork();
   632    if (tracee_pid == 0) {
   633      TEST_PCHECK(close(sockets[1]) == 0);
   634      TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0);
   635      MaybeSave();
   636  
   637      // This test will create a new thread in the child process.
   638      // pthread_create(2) isn't async-signal-safe, so we execve() first.
   639      execve(child_argv[0], child_argv, /* envp = */ nullptr);
   640      TEST_PCHECK_MSG(false, "Survived execve to test child");
   641    }
   642    ASSERT_THAT(tracee_pid, SyscallSucceeds());
   643    ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
   644  
   645    pid_t const tracer_pid = fork();
   646    if (tracer_pid == 0) {
   647      // Wait until the tracee has exec'd.
   648      char done;
   649      TEST_PCHECK(read(sockets[1], &done, 1) == 1);
   650      MaybeSave();
   651  
   652      TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0);
   653      _exit(0);
   654    }
   655    ASSERT_THAT(tracer_pid, SyscallSucceeds());
   656  
   657    // Clean up tracer.
   658    int status;
   659    ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
   660    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   661        << " status " << status;
   662  
   663    // Clean up tracee.
   664    ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
   665    ASSERT_THAT(waitpid(tracee_pid, &status, 0),
   666                SyscallSucceedsWithValue(tracee_pid));
   667    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
   668        << " status " << status;
   669  }
   670  
   671  [[noreturn]] void RunTracee(int fd) {
   672    // Indicate that we have exec'd.
   673    TEST_PCHECK(write(fd, "x", 1) == 1);
   674    MaybeSave();
   675  
   676    while (true) {
   677      SleepSafe(absl::Seconds(1));
   678    }
   679  }
   680  
   681  // Tests that YAMA exceptions store tracees by thread group leader. Exceptions
   682  // are cleared if the tracee process's thread group leader is terminated by
   683  // exec.
   684  TEST(PtraceTest, PrctlSetPtracerDoesNotPersistPastNonLeaderExec) {
   685    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
   686    AutoCapability cap(CAP_SYS_PTRACE, false);
   687  
   688    // Use sockets to synchronize between tracer and tracee.
   689    int sockets[2];
   690    ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
   691  
   692    // Allocate vector before forking (not async-signal-safe).
   693    ExecveArray const owned_child_argv = {
   694        "/proc/self/exe", "--ptrace_test_prctl_set_ptracer_and_exec_non_leader",
   695        "--ptrace_test_fd", std::to_string(sockets[0])};
   696    char* const* const child_argv = owned_child_argv.get();
   697  
   698    pid_t const tracee_pid = fork();
   699    if (tracee_pid == 0) {
   700      // This test will create a new thread in the child process.
   701      // pthread_create(2) isn't async-signal-safe, so we execve() first.
   702      TEST_PCHECK(close(sockets[1]) == 0);
   703      execve(child_argv[0], child_argv, /* envp = */ nullptr);
   704      TEST_PCHECK_MSG(false, "Survived execve to test child");
   705    }
   706    ASSERT_THAT(tracee_pid, SyscallSucceeds());
   707    ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
   708  
   709    pid_t const tracer_pid = fork();
   710    if (tracer_pid == 0) {
   711      // Wait until the tracee has exec'd.
   712      char done;
   713      TEST_PCHECK(read(sockets[1], &done, 1) == 1);
   714      MaybeSave();
   715  
   716      TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1);
   717      TEST_PCHECK(errno == EPERM);
   718      _exit(0);
   719    }
   720    ASSERT_THAT(tracer_pid, SyscallSucceeds());
   721  
   722    // Clean up tracer.
   723    int status;
   724    ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
   725    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   726        << " status " << status;
   727  
   728    // Clean up tracee.
   729    ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
   730    ASSERT_THAT(waitpid(tracee_pid, &status, 0),
   731                SyscallSucceedsWithValue(tracee_pid));
   732    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
   733        << " status " << status;
   734  }
   735  
   736  [[noreturn]] void RunPrctlSetPtracerDoesNotPersistPastNonLeaderExec(int fd) {
   737    ScopedThread t([fd] {
   738      TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0);
   739      MaybeSave();
   740  
   741      ExecveArray const owned_child_argv = {
   742          "/proc/self/exe", "--ptrace_test_tracee", "--ptrace_test_fd",
   743          std::to_string(fd)};
   744      char* const* const child_argv = owned_child_argv.get();
   745  
   746      execve(child_argv[0], child_argv, /* envp = */ nullptr);
   747      TEST_PCHECK_MSG(false, "Survived execve to test child");
   748    });
   749    t.Join();
   750    TEST_CHECK_MSG(false, "Survived execve? (main)");
   751    _exit(1);
   752  }
   753  
   754  // Tests that YAMA exceptions store the tracer itself rather than the thread
   755  // group leader. Exceptions are cleared when the tracer task exits, rather than
   756  // when its thread group leader exits.
   757  TEST(PtraceTest, PrctlSetPtracerDoesNotPersistPastTracerThreadExit) {
   758    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
   759  
   760    // Use sockets to synchronize between tracer and tracee.
   761    int sockets[2];
   762    ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
   763  
   764    pid_t const tracee_pid = fork();
   765    if (tracee_pid == 0) {
   766      TEST_PCHECK(close(sockets[1]) == 0);
   767      pid_t tracer_tid;
   768      TEST_PCHECK(read(sockets[0], &tracer_tid, sizeof(tracer_tid)) ==
   769                  sizeof(tracer_tid));
   770      MaybeSave();
   771  
   772      TEST_PCHECK(prctl(PR_SET_PTRACER, tracer_tid) == 0);
   773      MaybeSave();
   774      // Indicate that the prctl has been set.
   775      TEST_PCHECK(write(sockets[0], "x", 1) == 1);
   776      MaybeSave();
   777  
   778      while (true) {
   779        SleepSafe(absl::Seconds(1));
   780      }
   781    }
   782    ASSERT_THAT(tracee_pid, SyscallSucceeds());
   783    ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
   784  
   785    // Allocate vector before forking (not async-signal-safe).
   786    ExecveArray const owned_child_argv = {
   787        "/proc/self/exe",
   788        "--ptrace_test_prctl_set_ptracer_and_exit_tracer_thread",
   789        "--ptrace_test_prctl_set_ptracer_and_exit_tracer_thread_tid",
   790        std::to_string(tracee_pid),
   791        "--ptrace_test_fd",
   792        std::to_string(sockets[1])};
   793    char* const* const child_argv = owned_child_argv.get();
   794  
   795    pid_t const tracer_pid = fork();
   796    if (tracer_pid == 0) {
   797      // This test will create a new thread in the child process.
   798      // pthread_create(2) isn't async-signal-safe, so we execve() first.
   799      execve(child_argv[0], child_argv, /* envp = */ nullptr);
   800      TEST_PCHECK_MSG(false, "Survived execve to test child");
   801    }
   802    ASSERT_THAT(tracer_pid, SyscallSucceeds());
   803  
   804    // Clean up tracer.
   805    int status;
   806    ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
   807    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   808        << " status " << status;
   809  
   810    // Clean up tracee.
   811    ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
   812    ASSERT_THAT(waitpid(tracee_pid, &status, 0),
   813                SyscallSucceedsWithValue(tracee_pid));
   814    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
   815        << " status " << status;
   816  }
   817  
   818  [[noreturn]] void RunPrctlSetPtracerDoesNotPersistPastTracerThreadExit(
   819      int tracee_tid, int fd) {
   820    AutoCapability cap(CAP_SYS_PTRACE, false);
   821  
   822    ScopedThread t([fd] {
   823      pid_t const tracer_tid = gettid();
   824      TEST_PCHECK(write(fd, &tracer_tid, sizeof(tracer_tid)) ==
   825                  sizeof(tracer_tid));
   826  
   827      // Wait until the prctl has been set.
   828      char done;
   829      TEST_PCHECK(read(fd, &done, 1) == 1);
   830      MaybeSave();
   831    });
   832    t.Join();
   833  
   834    // Sleep for a bit before verifying the invalidation. The thread exit above
   835    // should cause the ptrace exception to be invalidated, but in Linux, this is
   836    // not done immediately. The YAMA exception is dropped during
   837    // __put_task_struct(), which occurs (at the earliest) one RCU grace period
   838    // after exit_notify() ==> release_task().
   839    SleepSafe(absl::Milliseconds(100));
   840  
   841    TEST_CHECK(CheckPtraceAttach(tracee_tid) == -1);
   842    TEST_PCHECK(errno == EPERM);
   843    _exit(0);
   844  }
   845  
   846  // Tests that YAMA exceptions store the tracer thread itself rather than the
   847  // thread group leader. Exceptions are preserved across exec in the tracer
   848  // thread, even if the thread group leader is terminated.
   849  TEST(PtraceTest, PrctlSetPtracerRespectsTracerThreadID) {
   850    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
   851  
   852    // Use sockets to synchronize between tracer and tracee.
   853    int sockets[2];
   854    ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
   855  
   856    pid_t const tracee_pid = fork();
   857    if (tracee_pid == 0) {
   858      TEST_PCHECK(close(sockets[1]) == 0);
   859      pid_t tracer_tid;
   860      TEST_PCHECK(read(sockets[0], &tracer_tid, sizeof(tracer_tid)) ==
   861                  sizeof(tracer_tid));
   862      MaybeSave();
   863  
   864      TEST_PCHECK(prctl(PR_SET_PTRACER, tracer_tid) == 0);
   865      MaybeSave();
   866      // Indicate that the prctl has been set.
   867      TEST_PCHECK(write(sockets[0], "x", 1) == 1);
   868      MaybeSave();
   869  
   870      while (true) {
   871        SleepSafe(absl::Seconds(1));
   872      }
   873    }
   874    ASSERT_THAT(tracee_pid, SyscallSucceeds());
   875    ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
   876  
   877    // Allocate vector before forking (not async-signal-safe).
   878    ExecveArray const owned_child_argv = {
   879        "/proc/self/exe",
   880        "--ptrace_test_prctl_set_ptracer_respects_tracer_thread_id",
   881        "--ptrace_test_prctl_set_ptracer_respects_tracer_thread_id_tid",
   882        std::to_string(tracee_pid),
   883        "--ptrace_test_fd",
   884        std::to_string(sockets[1])};
   885    char* const* const child_argv = owned_child_argv.get();
   886  
   887    pid_t const tracer_pid = fork();
   888    if (tracer_pid == 0) {
   889      // This test will create a new thread in the child process.
   890      // pthread_create(2) isn't async-signal-safe, so we execve() first.
   891      execve(child_argv[0], child_argv, /* envp = */ nullptr);
   892      TEST_PCHECK_MSG(false, "Survived execve to test child");
   893    }
   894    ASSERT_THAT(tracer_pid, SyscallSucceeds());
   895  
   896    // Clean up tracer.
   897    int status;
   898    ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
   899    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   900        << " status " << status;
   901  
   902    // Clean up tracee.
   903    ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
   904    ASSERT_THAT(waitpid(tracee_pid, &status, 0),
   905                SyscallSucceedsWithValue(tracee_pid));
   906    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
   907        << " status " << status;
   908  }
   909  
   910  [[noreturn]] void RunPrctlSetPtracerRespectsTracerThreadID(int tracee_tid,
   911                                                             int fd) {
   912    // Create a separate thread for tracing (i.e., not the thread group
   913    // leader). After the subsequent execve(), the current thread group leader
   914    // will no longer be exist, but the YAMA exception installed with this
   915    // thread should still be valid.
   916    ScopedThread t([tracee_tid, fd] {
   917      pid_t const tracer_tid = gettid();
   918      TEST_PCHECK(write(fd, &tracer_tid, sizeof(tracer_tid)));
   919      MaybeSave();
   920  
   921      // Wait until the tracee has made the PR_SET_PTRACER prctl.
   922      char done;
   923      TEST_PCHECK(read(fd, &done, 1) == 1);
   924      MaybeSave();
   925  
   926      ExecveArray const owned_child_argv = {
   927          "/proc/self/exe", "--ptrace_test_trace_tid", std::to_string(tracee_tid),
   928          "--ptrace_test_fd", std::to_string(fd)};
   929      char* const* const child_argv = owned_child_argv.get();
   930  
   931      execve(child_argv[0], child_argv, /* envp = */ nullptr);
   932      TEST_PCHECK_MSG(false, "Survived execve to test child");
   933    });
   934    t.Join();
   935    TEST_CHECK_MSG(false, "Survived execve? (main)");
   936    _exit(1);
   937  }
   938  
   939  [[noreturn]] void RunTraceTID(int tracee_tid, int fd) {
   940    TEST_PCHECK(SetCapability(CAP_SYS_PTRACE, false).ok());
   941    TEST_PCHECK(CheckPtraceAttach(tracee_tid) == 0);
   942    _exit(0);
   943  }
   944  
   945  // Tests that removing a YAMA exception does not affect a tracer that is already
   946  // attached.
   947  TEST(PtraceTest, PrctlClearPtracerDoesNotAffectCurrentTracer) {
   948    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
   949    AutoCapability cap(CAP_SYS_PTRACE, false);
   950  
   951    // Use sockets to synchronize between tracer and tracee.
   952    int sockets[2];
   953    ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
   954  
   955    pid_t const tracee_pid = fork();
   956    if (tracee_pid == 0) {
   957      TEST_PCHECK(close(sockets[1]) == 0);
   958      TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0);
   959      MaybeSave();
   960      // Indicate that the prctl has been set.
   961      TEST_PCHECK(write(sockets[0], "x", 1) == 1);
   962      MaybeSave();
   963  
   964      // Wait until tracer has attached before clearing PR_SET_PTRACER.
   965      char done;
   966      TEST_PCHECK(read(sockets[0], &done, 1) == 1);
   967      MaybeSave();
   968  
   969      TEST_PCHECK(prctl(PR_SET_PTRACER, 0) == 0);
   970      MaybeSave();
   971      // Indicate that the prctl has been set.
   972      TEST_PCHECK(write(sockets[0], "x", 1) == 1);
   973      MaybeSave();
   974  
   975      while (true) {
   976        SleepSafe(absl::Seconds(1));
   977      }
   978    }
   979    ASSERT_THAT(tracee_pid, SyscallSucceeds());
   980    ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
   981  
   982    std::string mem_path = "/proc/" + std::to_string(tracee_pid) + "/mem";
   983    pid_t const tracer_pid = fork();
   984    if (tracer_pid == 0) {
   985      // Wait until tracee has called prctl, or else we won't be able to attach.
   986      char done;
   987      TEST_PCHECK(read(sockets[1], &done, 1) == 1);
   988      MaybeSave();
   989  
   990      TEST_PCHECK(ptrace(PTRACE_ATTACH, tracee_pid, 0, 0) == 0);
   991      MaybeSave();
   992      // Indicate that we have attached.
   993      TEST_PCHECK(write(sockets[1], &done, 1) == 1);
   994      MaybeSave();
   995  
   996      // Block until tracee enters signal-delivery-stop as a result of the
   997      // SIGSTOP sent by PTRACE_ATTACH.
   998      int status;
   999      TEST_PCHECK(waitpid(tracee_pid, &status, 0) == tracee_pid);
  1000      MaybeSave();
  1001      TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
  1002      MaybeSave();
  1003  
  1004      TEST_PCHECK(ptrace(PTRACE_CONT, tracee_pid, 0, 0) == 0);
  1005      MaybeSave();
  1006  
  1007      // Wait until tracee has cleared PR_SET_PTRACER. Even though it was cleared,
  1008      // we should still be able to access /proc/[pid]/mem because we are already
  1009      // attached.
  1010      TEST_PCHECK(read(sockets[1], &done, 1) == 1);
  1011      MaybeSave();
  1012      TEST_PCHECK(open(mem_path.c_str(), O_RDONLY) != -1);
  1013      MaybeSave();
  1014      _exit(0);
  1015    }
  1016    ASSERT_THAT(tracer_pid, SyscallSucceeds());
  1017  
  1018    // Clean up tracer.
  1019    int status;
  1020    ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
  1021    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
  1022        << " status " << status;
  1023  
  1024    // Clean up tracee.
  1025    ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
  1026    ASSERT_THAT(waitpid(tracee_pid, &status, 0),
  1027                SyscallSucceedsWithValue(tracee_pid));
  1028    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
  1029        << " status " << status;
  1030  }
  1031  
  1032  TEST(PtraceTest, PrctlNotInherited) {
  1033    SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
  1034    AutoCapability cap(CAP_SYS_PTRACE, false);
  1035  
  1036    // Allow any ptracer. This should not affect the child processes.
  1037    ASSERT_THAT(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY), SyscallSucceeds());
  1038  
  1039    pid_t const tracee_pid = fork();
  1040    if (tracee_pid == 0) {
  1041      while (true) {
  1042        SleepSafe(absl::Seconds(1));
  1043      }
  1044    }
  1045    ASSERT_THAT(tracee_pid, SyscallSucceeds());
  1046  
  1047    pid_t const tracer_pid = fork();
  1048    if (tracer_pid == 0) {
  1049      TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1);
  1050      TEST_PCHECK(errno == EPERM);
  1051      _exit(0);
  1052    }
  1053    ASSERT_THAT(tracer_pid, SyscallSucceeds());
  1054  
  1055    // Clean up tracer.
  1056    int status;
  1057    ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
  1058    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
  1059        << " status " << status;
  1060  
  1061    // Clean up tracee.
  1062    ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
  1063    ASSERT_THAT(waitpid(tracee_pid, &status, 0),
  1064                SyscallSucceedsWithValue(tracee_pid));
  1065    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
  1066        << " status " << status;
  1067  }
  1068  
  1069  TEST(PtraceTest, AttachParent_PeekData_PokeData_SignalSuppression) {
  1070    // Yama prevents attaching to a parent. Skip the test if the scope is anything
  1071    // except disabled.
  1072    const int yama_scope = ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope());
  1073    SKIP_IF(yama_scope > 1);
  1074    if (yama_scope == 1) {
  1075      // Allow child to trace us.
  1076      ASSERT_THAT(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY), SyscallSucceeds());
  1077    }
  1078  
  1079    // Test PTRACE_POKE/PEEKDATA on both anonymous and file mappings.
  1080    const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
  1081    ASSERT_NO_ERRNO(Truncate(file.path(), kPageSize));
  1082    const FileDescriptor fd =
  1083        ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
  1084    const auto file_mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
  1085        nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0));
  1086  
  1087    constexpr long kBeforePokeDataAnonValue = 10;
  1088    constexpr long kAfterPokeDataAnonValue = 20;
  1089    constexpr long kBeforePokeDataFileValue = 0;  // implicit, due to truncate()
  1090    constexpr long kAfterPokeDataFileValue = 30;
  1091  
  1092    volatile long anon_word = kBeforePokeDataAnonValue;
  1093    auto* file_word_ptr = static_cast<volatile long*>(file_mapping.ptr());
  1094  
  1095    pid_t const child_pid = fork();
  1096    if (child_pid == 0) {
  1097      // In child process.
  1098  
  1099      // Attach to the parent.
  1100      pid_t const parent_pid = getppid();
  1101      TEST_PCHECK(ptrace(PTRACE_ATTACH, parent_pid, 0, 0) == 0);
  1102      MaybeSave();
  1103  
  1104      // Block until the parent enters signal-delivery-stop as a result of the
  1105      // SIGSTOP sent by PTRACE_ATTACH.
  1106      int status;
  1107      TEST_PCHECK(waitpid(parent_pid, &status, 0) == parent_pid);
  1108      MaybeSave();
  1109      TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
  1110  
  1111      // Replace the value of anon_word in the parent process with
  1112      // kAfterPokeDataAnonValue.
  1113      long parent_word = ptrace(PTRACE_PEEKDATA, parent_pid, &anon_word, 0);
  1114      MaybeSave();
  1115      TEST_CHECK(parent_word == kBeforePokeDataAnonValue);
  1116      TEST_PCHECK(ptrace(PTRACE_POKEDATA, parent_pid, &anon_word,
  1117                         kAfterPokeDataAnonValue) == 0);
  1118      MaybeSave();
  1119  
  1120      // Replace the value pointed to by file_word_ptr in the mapped file with
  1121      // kAfterPokeDataFileValue, via the parent process' mapping.
  1122      parent_word = ptrace(PTRACE_PEEKDATA, parent_pid, file_word_ptr, 0);
  1123      MaybeSave();
  1124      TEST_CHECK(parent_word == kBeforePokeDataFileValue);
  1125      TEST_PCHECK(ptrace(PTRACE_POKEDATA, parent_pid, file_word_ptr,
  1126                         kAfterPokeDataFileValue) == 0);
  1127      MaybeSave();
  1128  
  1129      // Detach from the parent and suppress the SIGSTOP. If the SIGSTOP is not
  1130      // suppressed, the parent will hang in group-stop, causing the test to time
  1131      // out.
  1132      TEST_PCHECK(ptrace(PTRACE_DETACH, parent_pid, 0, 0) == 0);
  1133      MaybeSave();
  1134      _exit(0);
  1135    }
  1136    // In parent process.
  1137    ASSERT_THAT(child_pid, SyscallSucceeds());
  1138  
  1139    // Wait for the child to complete.
  1140    int status;
  1141    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1142                SyscallSucceedsWithValue(child_pid));
  1143    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
  1144        << " status " << status;
  1145  
  1146    // Check that the child's PTRACE_POKEDATA was effective.
  1147    EXPECT_EQ(kAfterPokeDataAnonValue, anon_word);
  1148    EXPECT_EQ(kAfterPokeDataFileValue, *file_word_ptr);
  1149  }
  1150  
  1151  TEST(PtraceTest, GetSigMask) {
  1152    // glibc and the Linux kernel define a sigset_t with different sizes. To avoid
  1153    // creating a kernel_sigset_t and recreating all the modification functions
  1154    // (sigemptyset, etc), we just hardcode the kernel sigset size.
  1155    constexpr int kSizeofKernelSigset = 8;
  1156    constexpr int kBlockSignal = SIGUSR1;
  1157    sigset_t blocked;
  1158    sigemptyset(&blocked);
  1159    sigaddset(&blocked, kBlockSignal);
  1160  
  1161    pid_t const child_pid = fork();
  1162    if (child_pid == 0) {
  1163      // In child process.
  1164  
  1165      // Install a signal handler for kBlockSignal to avoid termination and block
  1166      // it.
  1167      TEST_PCHECK(signal(
  1168                      kBlockSignal, +[](int signo) {}) != SIG_ERR);
  1169      MaybeSave();
  1170      TEST_PCHECK(sigprocmask(SIG_SETMASK, &blocked, nullptr) == 0);
  1171      MaybeSave();
  1172  
  1173      // Enable tracing.
  1174      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  1175      MaybeSave();
  1176  
  1177      // This should be blocked.
  1178      RaiseSignal(kBlockSignal);
  1179  
  1180      // This should be suppressed by parent, who will change signal mask in the
  1181      // meantime, which means kBlockSignal should be delivered once this resumes.
  1182      RaiseSignal(SIGSTOP);
  1183  
  1184      _exit(0);
  1185    }
  1186    // In parent process.
  1187    ASSERT_THAT(child_pid, SyscallSucceeds());
  1188  
  1189    // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
  1190    int status;
  1191    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1192                SyscallSucceedsWithValue(child_pid));
  1193    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  1194        << " status " << status;
  1195  
  1196    // Get current signal mask.
  1197    sigset_t set;
  1198    EXPECT_THAT(ptrace(kPtraceGetSigMask, child_pid, kSizeofKernelSigset, &set),
  1199                SyscallSucceeds());
  1200    EXPECT_THAT(blocked, EqualsSigset(set));
  1201  
  1202    // Try to get current signal mask with bad size argument.
  1203    EXPECT_THAT(ptrace(kPtraceGetSigMask, child_pid, 0, nullptr),
  1204                SyscallFailsWithErrno(EINVAL));
  1205  
  1206    // Try to set bad signal mask.
  1207    sigset_t* bad_addr = reinterpret_cast<sigset_t*>(-1);
  1208    EXPECT_THAT(
  1209        ptrace(kPtraceSetSigMask, child_pid, kSizeofKernelSigset, bad_addr),
  1210        SyscallFailsWithErrno(EFAULT));
  1211  
  1212    // Set signal mask to empty set.
  1213    sigset_t set1;
  1214    sigemptyset(&set1);
  1215    EXPECT_THAT(ptrace(kPtraceSetSigMask, child_pid, kSizeofKernelSigset, &set1),
  1216                SyscallSucceeds());
  1217  
  1218    // Suppress SIGSTOP and resume the child. It should re-enter
  1219    // signal-delivery-stop for kBlockSignal.
  1220    ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
  1221    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1222                SyscallSucceedsWithValue(child_pid));
  1223    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kBlockSignal)
  1224        << " status " << status;
  1225  
  1226    ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
  1227    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1228                SyscallSucceedsWithValue(child_pid));
  1229    // Let's see that process exited normally.
  1230    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
  1231        << " status " << status;
  1232  }
  1233  
  1234  TEST(PtraceTest, GetSiginfo_SetSiginfo_SignalInjection) {
  1235    constexpr int kOriginalSigno = SIGUSR1;
  1236    constexpr int kInjectedSigno = SIGUSR2;
  1237  
  1238    pid_t const child_pid = fork();
  1239    if (child_pid == 0) {
  1240      // In child process.
  1241  
  1242      // Override all signal handlers.
  1243      struct sigaction sa = {};
  1244      sa.sa_handler = +[](int signo) { _exit(signo); };
  1245      TEST_PCHECK(sigfillset(&sa.sa_mask) == 0);
  1246      for (int signo = 1; signo < 32; signo++) {
  1247        if (signo == SIGKILL || signo == SIGSTOP) {
  1248          continue;
  1249        }
  1250        TEST_PCHECK(sigaction(signo, &sa, nullptr) == 0);
  1251      }
  1252      for (int signo = SIGRTMIN; signo <= SIGRTMAX; signo++) {
  1253        TEST_PCHECK(sigaction(signo, &sa, nullptr) == 0);
  1254      }
  1255  
  1256      // Unblock all signals.
  1257      TEST_PCHECK(sigprocmask(SIG_UNBLOCK, &sa.sa_mask, nullptr) == 0);
  1258      MaybeSave();
  1259  
  1260      // Send ourselves kOriginalSignal while ptraced and exit with the signal we
  1261      // actually receive via the signal handler, if any, or 0 if we don't receive
  1262      // a signal.
  1263      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  1264      MaybeSave();
  1265      RaiseSignal(kOriginalSigno);
  1266      _exit(0);
  1267    }
  1268    // In parent process.
  1269    ASSERT_THAT(child_pid, SyscallSucceeds());
  1270  
  1271    // Wait for the child to send itself kOriginalSigno and enter
  1272    // signal-delivery-stop.
  1273    int status;
  1274    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1275                SyscallSucceedsWithValue(child_pid));
  1276    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kOriginalSigno)
  1277        << " status " << status;
  1278  
  1279    siginfo_t siginfo = {};
  1280    ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
  1281                SyscallSucceeds());
  1282    EXPECT_EQ(kOriginalSigno, siginfo.si_signo);
  1283    EXPECT_EQ(SI_TKILL, siginfo.si_code);
  1284  
  1285    // Replace the signal with kInjectedSigno, and check that the child exits
  1286    // with kInjectedSigno, indicating that signal injection was successful.
  1287    siginfo.si_signo = kInjectedSigno;
  1288    ASSERT_THAT(ptrace(PTRACE_SETSIGINFO, child_pid, 0, &siginfo),
  1289                SyscallSucceeds());
  1290    ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, kInjectedSigno),
  1291                SyscallSucceeds());
  1292    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1293                SyscallSucceedsWithValue(child_pid));
  1294    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == kInjectedSigno)
  1295        << " status " << status;
  1296  }
  1297  
  1298  TEST(PtraceTest, SIGKILLDoesNotCauseSignalDeliveryStop) {
  1299    pid_t const child_pid = fork();
  1300    if (child_pid == 0) {
  1301      // In child process.
  1302      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  1303      MaybeSave();
  1304      RaiseSignal(SIGKILL);
  1305      TEST_CHECK_MSG(false, "Survived SIGKILL?");
  1306      _exit(1);
  1307    }
  1308    // In parent process.
  1309    ASSERT_THAT(child_pid, SyscallSucceeds());
  1310  
  1311    // Expect the child to die to SIGKILL without entering signal-delivery-stop.
  1312    int status;
  1313    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1314                SyscallSucceedsWithValue(child_pid));
  1315    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
  1316        << " status " << status;
  1317  }
  1318  
  1319  TEST(PtraceTest, PtraceKill) {
  1320    constexpr int kOriginalSigno = SIGUSR1;
  1321  
  1322    pid_t const child_pid = fork();
  1323    if (child_pid == 0) {
  1324      // In child process.
  1325      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  1326      MaybeSave();
  1327  
  1328      // PTRACE_KILL only works if tracee has entered signal-delivery-stop.
  1329      RaiseSignal(kOriginalSigno);
  1330      TEST_CHECK_MSG(false, "Failed to kill the process?");
  1331      _exit(0);
  1332    }
  1333    // In parent process.
  1334    ASSERT_THAT(child_pid, SyscallSucceeds());
  1335  
  1336    // Wait for the child to send itself kOriginalSigno and enter
  1337    // signal-delivery-stop.
  1338    int status;
  1339    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1340                SyscallSucceedsWithValue(child_pid));
  1341    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kOriginalSigno)
  1342        << " status " << status;
  1343  
  1344    ASSERT_THAT(ptrace(PTRACE_KILL, child_pid, 0, 0), SyscallSucceeds());
  1345  
  1346    // Expect the child to die with SIGKILL.
  1347    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1348                SyscallSucceedsWithValue(child_pid));
  1349    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
  1350        << " status " << status;
  1351  }
  1352  
  1353  TEST(PtraceTest, GetRegSet) {
  1354    pid_t const child_pid = fork();
  1355    if (child_pid == 0) {
  1356      // In child process.
  1357  
  1358      // Enable tracing.
  1359      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  1360      MaybeSave();
  1361  
  1362      // Use kill explicitly because we check the syscall argument register below.
  1363      kill(getpid(), SIGSTOP);
  1364  
  1365      _exit(0);
  1366    }
  1367    // In parent process.
  1368    ASSERT_THAT(child_pid, SyscallSucceeds());
  1369  
  1370    // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
  1371    int status;
  1372    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1373                SyscallSucceedsWithValue(child_pid));
  1374    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  1375        << " status " << status;
  1376  
  1377    // Get the general registers.
  1378    struct user_regs_struct regs;
  1379    struct iovec iov;
  1380    iov.iov_base = &regs;
  1381    iov.iov_len = sizeof(regs);
  1382    EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov),
  1383                SyscallSucceeds());
  1384  
  1385    // Read exactly the full register set.
  1386    EXPECT_EQ(iov.iov_len, sizeof(regs));
  1387  
  1388  #if defined(__x86_64__)
  1389    // Child called kill(2), with SIGSTOP as arg 2.
  1390    EXPECT_EQ(regs.rsi, SIGSTOP);
  1391  #elif defined(__aarch64__)
  1392    EXPECT_EQ(regs.regs[1], SIGSTOP);
  1393  #endif
  1394  
  1395    // Suppress SIGSTOP and resume the child.
  1396    ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
  1397    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1398                SyscallSucceedsWithValue(child_pid));
  1399    // Let's see that process exited normally.
  1400    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
  1401        << " status " << status;
  1402  }
  1403  
  1404  TEST(PtraceTest, AttachingConvertsGroupStopToPtraceStop) {
  1405    pid_t const child_pid = fork();
  1406    if (child_pid == 0) {
  1407      // In child process.
  1408      while (true) {
  1409        pause();
  1410      }
  1411    }
  1412    // In parent process.
  1413    ASSERT_THAT(child_pid, SyscallSucceeds());
  1414  
  1415    // SIGSTOP the child and wait for it to stop.
  1416    ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds());
  1417    int status;
  1418    ASSERT_THAT(waitpid(child_pid, &status, WUNTRACED),
  1419                SyscallSucceedsWithValue(child_pid));
  1420    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  1421        << " status " << status;
  1422  
  1423    // Attach to the child and expect it to re-enter a traced group-stop despite
  1424    // already being stopped.
  1425    ASSERT_THAT(ptrace(PTRACE_ATTACH, child_pid, 0, 0), SyscallSucceeds());
  1426    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1427                SyscallSucceedsWithValue(child_pid));
  1428    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  1429        << " status " << status;
  1430  
  1431    // Verify that the child is ptrace-stopped by checking that it can receive
  1432    // ptrace commands requiring a ptrace-stop.
  1433    EXPECT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, 0), SyscallSucceeds());
  1434  
  1435    // Group-stop is distinguished from signal-delivery-stop by PTRACE_GETSIGINFO
  1436    // failing with EINVAL.
  1437    siginfo_t siginfo = {};
  1438    EXPECT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
  1439                SyscallFailsWithErrno(EINVAL));
  1440  
  1441    // Detach from the child and expect it to stay stopped without a notification.
  1442    ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
  1443    ASSERT_THAT(waitpid(child_pid, &status, WUNTRACED | WNOHANG),
  1444                SyscallSucceedsWithValue(0));
  1445  
  1446    // Sending it SIGCONT should cause it to leave its stop.
  1447    ASSERT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds());
  1448    ASSERT_THAT(waitpid(child_pid, &status, WCONTINUED),
  1449                SyscallSucceedsWithValue(child_pid));
  1450    EXPECT_TRUE(WIFCONTINUED(status)) << " status " << status;
  1451  
  1452    // Clean up the child.
  1453    ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
  1454    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1455                SyscallSucceedsWithValue(child_pid));
  1456    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
  1457        << " status " << status;
  1458  }
  1459  
  1460  // Fixture for tests parameterized by whether or not to use PTRACE_O_TRACEEXEC.
  1461  class PtraceExecveTest : public ::testing::TestWithParam<bool> {
  1462   protected:
  1463    bool TraceExec() const { return GetParam(); }
  1464  };
  1465  
  1466  TEST_P(PtraceExecveTest, Execve_GetRegs_PeekUser_SIGKILL_TraceClone_TraceExit) {
  1467    ExecveArray const owned_child_argv = {"/proc/self/exe",
  1468                                          "--ptrace_test_execve_child"};
  1469    char* const* const child_argv = owned_child_argv.get();
  1470  
  1471    pid_t const child_pid = fork();
  1472    if (child_pid == 0) {
  1473      // In child process. The test relies on calling execve() in a non-leader
  1474      // thread; pthread_create() isn't async-signal-safe, so the safest way to
  1475      // do this is to execve() first, then enable tracing and run the expected
  1476      // child process behavior in the new subprocess.
  1477      execve(child_argv[0], child_argv, /* envp = */ nullptr);
  1478      TEST_PCHECK_MSG(false, "Survived execve to test child");
  1479    }
  1480    // In parent process.
  1481    ASSERT_THAT(child_pid, SyscallSucceeds());
  1482  
  1483    // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
  1484    int status;
  1485    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1486                SyscallSucceedsWithValue(child_pid));
  1487    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  1488        << " status " << status;
  1489  
  1490    // Enable PTRACE_O_TRACECLONE so we can get the ID of the child's non-leader
  1491    // thread, PTRACE_O_TRACEEXIT so we can observe the leader's death, and
  1492    // PTRACE_O_TRACEEXEC if required by the test. (The leader doesn't call
  1493    // execve, but options should be inherited across clone.)
  1494    long opts = PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT;
  1495    if (TraceExec()) {
  1496      opts |= PTRACE_O_TRACEEXEC;
  1497    }
  1498    ASSERT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, opts), SyscallSucceeds());
  1499  
  1500    // Suppress the SIGSTOP and wait for the child's leader thread to report
  1501    // PTRACE_EVENT_CLONE. Get the new thread's ID from the event.
  1502    ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
  1503    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1504                SyscallSucceedsWithValue(child_pid));
  1505    EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_CLONE << 8), status >> 8);
  1506    unsigned long eventmsg;
  1507    ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg),
  1508                SyscallSucceeds());
  1509    pid_t const nonleader_tid = eventmsg;
  1510    pid_t const leader_tid = child_pid;
  1511  
  1512    // The new thread should be ptraced and in signal-delivery-stop by SIGSTOP due
  1513    // to PTRACE_O_TRACECLONE.
  1514    //
  1515    // Before bf959931ddb88c4e4366e96dd22e68fa0db9527c "wait/ptrace: assume __WALL
  1516    // if the child is traced" (4.7) , waiting on it requires __WCLONE since, as a
  1517    // non-leader, its termination signal is 0. After, a standard wait is
  1518    // sufficient.
  1519    ASSERT_THAT(waitpid(nonleader_tid, &status, __WCLONE),
  1520                SyscallSucceedsWithValue(nonleader_tid));
  1521    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  1522        << " status " << status;
  1523  
  1524    // Resume both child threads.
  1525    for (pid_t const tid : {leader_tid, nonleader_tid}) {
  1526      ASSERT_THAT(ptrace(PTRACE_CONT, tid, 0, 0), SyscallSucceeds());
  1527    }
  1528  
  1529    // The non-leader child thread should call execve, causing the leader thread
  1530    // to enter PTRACE_EVENT_EXIT with an apparent exit code of 0. At this point,
  1531    // the leader has not yet exited, so the non-leader should be blocked in
  1532    // execve.
  1533    ASSERT_THAT(waitpid(leader_tid, &status, 0),
  1534                SyscallSucceedsWithValue(leader_tid));
  1535    EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXIT << 8), status >> 8);
  1536    ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg),
  1537                SyscallSucceeds());
  1538    EXPECT_TRUE(WIFEXITED(eventmsg) && WEXITSTATUS(eventmsg) == 0)
  1539        << " eventmsg " << eventmsg;
  1540    EXPECT_THAT(waitpid(nonleader_tid, &status, __WCLONE | WNOHANG),
  1541                SyscallSucceedsWithValue(0));
  1542  
  1543    // Allow the leader to continue exiting. This should allow the non-leader to
  1544    // complete its execve, causing the original leader to be reaped without
  1545    // further notice and the non-leader to steal its ID.
  1546    ASSERT_THAT(ptrace(PTRACE_CONT, leader_tid, 0, 0), SyscallSucceeds());
  1547    ASSERT_THAT(waitpid(leader_tid, &status, 0),
  1548                SyscallSucceedsWithValue(leader_tid));
  1549    if (TraceExec()) {
  1550      // If PTRACE_O_TRACEEXEC was enabled, the execing thread should be in
  1551      // PTRACE_EVENT_EXEC-stop, with the event message set to its old thread ID.
  1552      EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXEC << 8), status >> 8);
  1553      ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg),
  1554                  SyscallSucceeds());
  1555      EXPECT_EQ(nonleader_tid, eventmsg);
  1556    } else {
  1557      // Otherwise, the execing thread should have received SIGTRAP and should now
  1558      // be in signal-delivery-stop.
  1559      EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
  1560          << " status " << status;
  1561    }
  1562  
  1563  #ifdef __x86_64__
  1564    {
  1565      // CS should be 0x33, indicating an 64-bit binary.
  1566      constexpr uint64_t kAMD64UserCS = 0x33;
  1567      EXPECT_THAT(ptrace(PTRACE_PEEKUSER, leader_tid,
  1568                         offsetof(struct user_regs_struct, cs), 0),
  1569                  SyscallSucceedsWithValue(kAMD64UserCS));
  1570      struct user_regs_struct regs = {};
  1571      ASSERT_THAT(ptrace(PTRACE_GETREGS, leader_tid, 0, &regs),
  1572                  SyscallSucceeds());
  1573      EXPECT_EQ(kAMD64UserCS, regs.cs);
  1574    }
  1575  #endif  // defined(__x86_64__)
  1576  
  1577    // PTRACE_O_TRACEEXIT should have been inherited across execve. Send SIGKILL,
  1578    // which should end the PTRACE_EVENT_EXEC-stop or signal-delivery-stop and
  1579    // leave the child in PTRACE_EVENT_EXIT-stop.
  1580    ASSERT_THAT(kill(leader_tid, SIGKILL), SyscallSucceeds());
  1581    ASSERT_THAT(waitpid(leader_tid, &status, 0),
  1582                SyscallSucceedsWithValue(leader_tid));
  1583    EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXIT << 8), status >> 8);
  1584    ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg),
  1585                SyscallSucceeds());
  1586    EXPECT_TRUE(WIFSIGNALED(eventmsg) && WTERMSIG(eventmsg) == SIGKILL)
  1587        << " eventmsg " << eventmsg;
  1588  
  1589    // End the PTRACE_EVENT_EXIT stop, allowing the child to exit.
  1590    ASSERT_THAT(ptrace(PTRACE_CONT, leader_tid, 0, 0), SyscallSucceeds());
  1591    ASSERT_THAT(waitpid(leader_tid, &status, 0),
  1592                SyscallSucceedsWithValue(leader_tid));
  1593    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
  1594        << " status " << status;
  1595  }
  1596  
  1597  [[noreturn]] void RunExecveChild() {
  1598    // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
  1599    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  1600    MaybeSave();
  1601    RaiseSignal(SIGSTOP);
  1602    MaybeSave();
  1603  
  1604    // Call execve() in a non-leader thread. As long as execve() succeeds, what
  1605    // exactly we execve() shouldn't really matter, since the tracer should kill
  1606    // us after execve() completes.
  1607    ScopedThread t([&] {
  1608      ExecveArray const owned_child_argv = {"/proc/self/exe",
  1609                                            "--this_flag_shouldnt_exist"};
  1610      char* const* const child_argv = owned_child_argv.get();
  1611      execve(child_argv[0], child_argv, /* envp = */ nullptr);
  1612      TEST_PCHECK_MSG(false, "Survived execve? (thread)");
  1613    });
  1614    t.Join();
  1615    TEST_CHECK_MSG(false, "Survived execve? (main)");
  1616    _exit(1);
  1617  }
  1618  
  1619  INSTANTIATE_TEST_SUITE_P(TraceExec, PtraceExecveTest, ::testing::Bool());
  1620  
  1621  // This test has expectations on when syscall-enter/exit-stops occur that are
  1622  // violated if saving occurs, since saving interrupts all syscalls, causing
  1623  // premature syscall-exit.
  1624  TEST(PtraceTest, ExitWhenParentIsNotTracer_Syscall_TraceVfork_TraceVforkDone) {
  1625    constexpr int kExitTraceeExitCode = 99;
  1626  
  1627    pid_t const child_pid = fork();
  1628    if (child_pid == 0) {
  1629      // In child process.
  1630  
  1631      // Block SIGCHLD so it doesn't interrupt wait4.
  1632      sigset_t mask;
  1633      TEST_PCHECK(sigemptyset(&mask) == 0);
  1634      TEST_PCHECK(sigaddset(&mask, SIGCHLD) == 0);
  1635      TEST_PCHECK(sigprocmask(SIG_SETMASK, &mask, nullptr) == 0);
  1636      MaybeSave();
  1637  
  1638      // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
  1639      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  1640      MaybeSave();
  1641      RaiseSignal(SIGSTOP);
  1642      MaybeSave();
  1643  
  1644      // Spawn a vfork child that exits immediately, and reap it. Don't save
  1645      // after vfork since the parent expects to see wait4 as the next syscall.
  1646      pid_t const pid = vfork();
  1647      if (pid == 0) {
  1648        _exit(kExitTraceeExitCode);
  1649      }
  1650      TEST_PCHECK_MSG(pid > 0, "vfork failed");
  1651  
  1652      int status;
  1653      TEST_PCHECK(wait4(pid, &status, 0, nullptr) > 0);
  1654      MaybeSave();
  1655      TEST_CHECK(WIFEXITED(status) && WEXITSTATUS(status) == kExitTraceeExitCode);
  1656      _exit(0);
  1657    }
  1658    // In parent process.
  1659    ASSERT_THAT(child_pid, SyscallSucceeds());
  1660  
  1661    // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
  1662    int status;
  1663    ASSERT_THAT(child_pid, SyscallSucceeds());
  1664    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1665                SyscallSucceedsWithValue(child_pid));
  1666    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  1667        << " status " << status;
  1668  
  1669    // Enable PTRACE_O_TRACEVFORK so we can get the ID of the grandchild,
  1670    // PTRACE_O_TRACEVFORKDONE so we can observe PTRACE_EVENT_VFORK_DONE, and
  1671    // PTRACE_O_TRACESYSGOOD so syscall-enter/exit-stops are unambiguously
  1672    // indicated by a stop signal of SIGTRAP|0x80 rather than just SIGTRAP.
  1673    ASSERT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0,
  1674                       PTRACE_O_TRACEVFORK | PTRACE_O_TRACEVFORKDONE |
  1675                           PTRACE_O_TRACESYSGOOD),
  1676                SyscallSucceeds());
  1677  
  1678    // Suppress the SIGSTOP and wait for the child to report PTRACE_EVENT_VFORK.
  1679    // Get the new process' ID from the event.
  1680    ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
  1681    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1682                SyscallSucceedsWithValue(child_pid));
  1683    EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_VFORK << 8), status >> 8);
  1684    unsigned long eventmsg;
  1685    ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg),
  1686                SyscallSucceeds());
  1687    pid_t const grandchild_pid = eventmsg;
  1688  
  1689    // The grandchild should be traced by us and in signal-delivery-stop by
  1690    // SIGSTOP due to PTRACE_O_TRACEVFORK. This allows us to wait on it even
  1691    // though we're not its parent.
  1692    ASSERT_THAT(waitpid(grandchild_pid, &status, 0),
  1693                SyscallSucceedsWithValue(grandchild_pid));
  1694    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  1695        << " status " << status;
  1696  
  1697    // Resume the child with PTRACE_SYSCALL. Since the grandchild is still in
  1698    // signal-delivery-stop, the child should remain in vfork() waiting for the
  1699    // grandchild to exec or exit.
  1700    ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
  1701    absl::SleepFor(absl::Seconds(1));
  1702    ASSERT_THAT(waitpid(child_pid, &status, WNOHANG),
  1703                SyscallSucceedsWithValue(0));
  1704  
  1705    // Suppress the grandchild's SIGSTOP and wait for the grandchild to exit. Pass
  1706    // WNOWAIT to waitid() so that we don't acknowledge the grandchild's exit yet.
  1707    ASSERT_THAT(ptrace(PTRACE_CONT, grandchild_pid, 0, 0), SyscallSucceeds());
  1708    siginfo_t siginfo = {};
  1709    ASSERT_THAT(waitid(P_PID, grandchild_pid, &siginfo, WEXITED | WNOWAIT),
  1710                SyscallSucceeds());
  1711    EXPECT_EQ(SIGCHLD, siginfo.si_signo);
  1712    EXPECT_EQ(CLD_EXITED, siginfo.si_code);
  1713    EXPECT_EQ(kExitTraceeExitCode, siginfo.si_status);
  1714    EXPECT_EQ(grandchild_pid, siginfo.si_pid);
  1715    EXPECT_EQ(getuid(), siginfo.si_uid);
  1716  
  1717    // The child should now be in PTRACE_EVENT_VFORK_DONE stop. The event
  1718    // message should still be the grandchild's PID.
  1719    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1720                SyscallSucceedsWithValue(child_pid));
  1721    EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_VFORK_DONE << 8), status >> 8);
  1722    ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg),
  1723                SyscallSucceeds());
  1724    EXPECT_EQ(grandchild_pid, eventmsg);
  1725  
  1726    // Resume the child with PTRACE_SYSCALL again and expect it to enter
  1727    // syscall-exit-stop for vfork() or clone(), either of which should return the
  1728    // grandchild's PID from the syscall. Aside from PTRACE_O_TRACESYSGOOD,
  1729    // syscall-stops are distinguished from signal-delivery-stop by
  1730    // PTRACE_GETSIGINFO returning a siginfo for which si_code == SIGTRAP or
  1731    // SIGTRAP|0x80.
  1732    ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
  1733    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1734                SyscallSucceedsWithValue(child_pid));
  1735    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
  1736        << " status " << status;
  1737    ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
  1738                SyscallSucceeds());
  1739    EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80))
  1740        << "si_code = " << siginfo.si_code;
  1741  
  1742    {
  1743      struct user_regs_struct regs = {};
  1744      struct iovec iov;
  1745      iov.iov_base = &regs;
  1746      iov.iov_len = sizeof(regs);
  1747      EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov),
  1748                  SyscallSucceeds());
  1749  #if defined(__x86_64__)
  1750      EXPECT_TRUE(regs.orig_rax == SYS_vfork || regs.orig_rax == SYS_clone)
  1751          << "orig_rax = " << regs.orig_rax;
  1752      EXPECT_EQ(grandchild_pid, regs.rax);
  1753  #elif defined(__aarch64__)
  1754      EXPECT_TRUE(regs.regs[8] == SYS_clone) << "regs[8] = " << regs.regs[8];
  1755      EXPECT_EQ(grandchild_pid, regs.regs[0]);
  1756  #endif  // defined(__x86_64__)
  1757    }
  1758  
  1759    // After this point, the child will be making wait4 syscalls that will be
  1760    // interrupted by saving, so saving is not permitted. Note that this is
  1761    // explicitly released below once the grandchild exits.
  1762    DisableSave ds;
  1763  
  1764    // Resume the child with PTRACE_SYSCALL again and expect it to enter
  1765    // syscall-enter-stop for wait4().
  1766    ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
  1767    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1768                SyscallSucceedsWithValue(child_pid));
  1769    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
  1770        << " status " << status;
  1771    ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
  1772                SyscallSucceeds());
  1773    EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80))
  1774        << "si_code = " << siginfo.si_code;
  1775  #ifdef __x86_64__
  1776    {
  1777      EXPECT_THAT(ptrace(PTRACE_PEEKUSER, child_pid,
  1778                         offsetof(struct user_regs_struct, orig_rax), 0),
  1779                  SyscallSucceedsWithValue(SYS_wait4));
  1780    }
  1781  #endif  // defined(__x86_64__)
  1782  
  1783    // Resume the child with PTRACE_SYSCALL again. Since the grandchild is
  1784    // waiting for the tracer (us) to acknowledge its exit first, wait4 should
  1785    // block.
  1786    ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
  1787    absl::SleepFor(absl::Seconds(1));
  1788    ASSERT_THAT(waitpid(child_pid, &status, WNOHANG),
  1789                SyscallSucceedsWithValue(0));
  1790  
  1791    // Acknowledge the grandchild's exit.
  1792    ASSERT_THAT(waitpid(grandchild_pid, &status, 0),
  1793                SyscallSucceedsWithValue(grandchild_pid));
  1794    ds.reset();
  1795  
  1796    // Now the child should enter syscall-exit-stop for wait4, returning with the
  1797    // grandchild's PID.
  1798    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1799                SyscallSucceedsWithValue(child_pid));
  1800    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
  1801        << " status " << status;
  1802    {
  1803      struct user_regs_struct regs = {};
  1804      struct iovec iov;
  1805      iov.iov_base = &regs;
  1806      iov.iov_len = sizeof(regs);
  1807      EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov),
  1808                  SyscallSucceeds());
  1809  #if defined(__x86_64__)
  1810      EXPECT_EQ(SYS_wait4, regs.orig_rax);
  1811      EXPECT_EQ(grandchild_pid, regs.rax);
  1812  #elif defined(__aarch64__)
  1813      EXPECT_EQ(SYS_wait4, regs.regs[8]);
  1814      EXPECT_EQ(grandchild_pid, regs.regs[0]);
  1815  #endif  // defined(__x86_64__)
  1816    }
  1817  
  1818    // Detach from the child and wait for it to exit.
  1819    ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
  1820    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1821                SyscallSucceedsWithValue(child_pid));
  1822    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
  1823        << " status " << status;
  1824  }
  1825  
  1826  // These tests requires knowledge of architecture-specific syscall convention.
  1827  #ifdef __x86_64__
  1828  TEST(PtraceTest, Int3) {
  1829    SKIP_IF(PlatformSupportInt3() == PlatformSupport::NotSupported);
  1830  
  1831    pid_t const child_pid = fork();
  1832    if (child_pid == 0) {
  1833      // In child process.
  1834  
  1835      // Enable tracing.
  1836      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  1837  
  1838      // Interrupt 3 - trap to debugger
  1839      asm("int3");
  1840  
  1841      _exit(56);
  1842    }
  1843    // In parent process.
  1844    ASSERT_THAT(child_pid, SyscallSucceeds());
  1845  
  1846    int status;
  1847    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1848                SyscallSucceedsWithValue(child_pid));
  1849    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
  1850        << " status " << status;
  1851  
  1852    ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
  1853  
  1854    // The child should validate the injected return value and then exit normally.
  1855    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1856                SyscallSucceedsWithValue(child_pid));
  1857    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 56)
  1858        << " status " << status;
  1859  }
  1860  
  1861  TEST(PtraceTest, Sysemu_PokeUser) {
  1862    constexpr int kSysemuHelperFirstExitCode = 126;
  1863    constexpr uint64_t kSysemuInjectedExitGroupReturn = 42;
  1864  
  1865    pid_t const child_pid = fork();
  1866    if (child_pid == 0) {
  1867      // In child process.
  1868  
  1869      // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
  1870      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  1871      RaiseSignal(SIGSTOP);
  1872  
  1873      // Try to exit_group, expecting the tracer to skip the syscall and set its
  1874      // own return value.
  1875      int const rv = syscall(SYS_exit_group, kSysemuHelperFirstExitCode);
  1876      TEST_PCHECK_MSG(rv == kSysemuInjectedExitGroupReturn,
  1877                      "exit_group returned incorrect value");
  1878  
  1879      _exit(0);
  1880    }
  1881    // In parent process.
  1882    ASSERT_THAT(child_pid, SyscallSucceeds());
  1883  
  1884    // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
  1885    int status;
  1886    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1887                SyscallSucceedsWithValue(child_pid));
  1888    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  1889        << " status " << status;
  1890  
  1891    // Suppress the SIGSTOP and wait for the child to enter syscall-enter-stop
  1892    // for its first exit_group syscall.
  1893    ASSERT_THAT(ptrace(kPtraceSysemu, child_pid, 0, 0), SyscallSucceeds());
  1894    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1895                SyscallSucceedsWithValue(child_pid));
  1896    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
  1897        << " status " << status;
  1898  
  1899    struct user_regs_struct regs = {};
  1900    ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
  1901    EXPECT_EQ(SYS_exit_group, regs.orig_rax);
  1902    EXPECT_EQ(-ENOSYS, regs.rax);
  1903    EXPECT_EQ(kSysemuHelperFirstExitCode, regs.rdi);
  1904  
  1905    // Replace the exit_group return value, then resume the child, which should
  1906    // automatically skip the syscall.
  1907    ASSERT_THAT(
  1908        ptrace(PTRACE_POKEUSER, child_pid, offsetof(struct user_regs_struct, rax),
  1909               kSysemuInjectedExitGroupReturn),
  1910        SyscallSucceeds());
  1911    ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
  1912  
  1913    // The child should validate the injected return value and then exit normally.
  1914    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1915                SyscallSucceedsWithValue(child_pid));
  1916    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
  1917        << " status " << status;
  1918  }
  1919  
  1920  // This test also cares about syscall-exit-stop.
  1921  TEST(PtraceTest, ERESTART) {
  1922    constexpr int kSigno = SIGUSR1;
  1923  
  1924    pid_t const child_pid = fork();
  1925    if (child_pid == 0) {
  1926      // In child process.
  1927  
  1928      // Ignore, but unblock, kSigno.
  1929      struct sigaction sa = {};
  1930      sa.sa_handler = SIG_IGN;
  1931      TEST_PCHECK(sigfillset(&sa.sa_mask) == 0);
  1932      TEST_PCHECK(sigaction(kSigno, &sa, nullptr) == 0);
  1933      MaybeSave();
  1934      TEST_PCHECK(sigprocmask(SIG_UNBLOCK, &sa.sa_mask, nullptr) == 0);
  1935      MaybeSave();
  1936  
  1937      // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
  1938      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  1939      RaiseSignal(SIGSTOP);
  1940  
  1941      // Invoke the pause syscall, which normally should not return until we
  1942      // receive a signal that "either terminates the process or causes the
  1943      // invocation of a signal-catching function".
  1944      pause();
  1945  
  1946      _exit(0);
  1947    }
  1948    ASSERT_THAT(child_pid, SyscallSucceeds());
  1949  
  1950    // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
  1951    int status;
  1952    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1953                SyscallSucceedsWithValue(child_pid));
  1954    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  1955        << " status " << status;
  1956  
  1957    // After this point, the child's pause syscall will be interrupted by saving,
  1958    // so saving is not permitted. Note that this is explicitly released below
  1959    // once the child is stopped.
  1960    DisableSave ds;
  1961  
  1962    // Suppress the SIGSTOP and wait for the child to enter syscall-enter-stop for
  1963    // its pause syscall.
  1964    ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
  1965    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1966                SyscallSucceedsWithValue(child_pid));
  1967    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
  1968        << " status " << status;
  1969  
  1970    struct user_regs_struct regs = {};
  1971    ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
  1972    EXPECT_EQ(SYS_pause, regs.orig_rax);
  1973    EXPECT_EQ(-ENOSYS, regs.rax);
  1974  
  1975    // Resume the child with PTRACE_SYSCALL and expect it to block in the pause
  1976    // syscall.
  1977    ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
  1978    absl::SleepFor(absl::Seconds(1));
  1979    ASSERT_THAT(waitpid(child_pid, &status, WNOHANG),
  1980                SyscallSucceedsWithValue(0));
  1981  
  1982    // Send the child kSigno, causing it to return ERESTARTNOHAND and enter
  1983    // syscall-exit-stop from the pause syscall.
  1984    constexpr int ERESTARTNOHAND = 514;
  1985    ASSERT_THAT(kill(child_pid, kSigno), SyscallSucceeds());
  1986    ASSERT_THAT(waitpid(child_pid, &status, 0),
  1987                SyscallSucceedsWithValue(child_pid));
  1988    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
  1989        << " status " << status;
  1990    ds.reset();
  1991  
  1992    ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
  1993    EXPECT_EQ(SYS_pause, regs.orig_rax);
  1994    EXPECT_EQ(-ERESTARTNOHAND, regs.rax);
  1995  
  1996    // Replace the return value from pause with 0, causing pause to not be
  1997    // restarted despite kSigno being ignored.
  1998    ASSERT_THAT(ptrace(PTRACE_POKEUSER, child_pid,
  1999                       offsetof(struct user_regs_struct, rax), 0),
  2000                SyscallSucceeds());
  2001  
  2002    // Detach from the child and wait for it to exit.
  2003    ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
  2004    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2005                SyscallSucceedsWithValue(child_pid));
  2006    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
  2007        << " status " << status;
  2008  }
  2009  #endif  // defined(__x86_64__)
  2010  
  2011  TEST(PtraceTest, Seize_Interrupt_Listen) {
  2012    volatile long child_should_spin = 1;
  2013    pid_t const child_pid = fork();
  2014    if (child_pid == 0) {
  2015      // In child process.
  2016      while (child_should_spin) {
  2017        SleepSafe(absl::Seconds(1));
  2018      }
  2019      _exit(1);
  2020    }
  2021  
  2022    // In parent process.
  2023    ASSERT_THAT(child_pid, SyscallSucceeds());
  2024  
  2025    // Attach to the child with PTRACE_SEIZE; doing so should not stop the child.
  2026    ASSERT_THAT(ptrace(PTRACE_SEIZE, child_pid, 0, 0), SyscallSucceeds());
  2027    int status;
  2028    EXPECT_THAT(waitpid(child_pid, &status, WNOHANG),
  2029                SyscallSucceedsWithValue(0));
  2030  
  2031    // Stop the child with PTRACE_INTERRUPT.
  2032    ASSERT_THAT(ptrace(PTRACE_INTERRUPT, child_pid, 0, 0), SyscallSucceeds());
  2033    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2034                SyscallSucceedsWithValue(child_pid));
  2035    EXPECT_EQ(SIGTRAP | (kPtraceEventStop << 8), status >> 8);
  2036  
  2037    // Unset child_should_spin to verify that the child never leaves the spin
  2038    // loop.
  2039    ASSERT_THAT(ptrace(PTRACE_POKEDATA, child_pid, &child_should_spin, 0),
  2040                SyscallSucceeds());
  2041  
  2042    // Send SIGSTOP to the child, then resume it, allowing it to proceed to
  2043    // signal-delivery-stop.
  2044    ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds());
  2045    ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
  2046    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2047                SyscallSucceedsWithValue(child_pid));
  2048    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  2049        << " status " << status;
  2050  
  2051    // Release the child from signal-delivery-stop without suppressing the
  2052    // SIGSTOP, causing it to enter group-stop.
  2053    ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, SIGSTOP), SyscallSucceeds());
  2054    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2055                SyscallSucceedsWithValue(child_pid));
  2056    EXPECT_EQ(SIGSTOP | (kPtraceEventStop << 8), status >> 8);
  2057  
  2058    // "The state of the tracee after PTRACE_LISTEN is somewhat of a gray area: it
  2059    // is not in any ptrace-stop (ptrace commands won't work on it, and it will
  2060    // deliver waitpid(2) notifications), but it also may be considered 'stopped'
  2061    // because it is not executing instructions (is not scheduled), and if it was
  2062    // in group-stop before PTRACE_LISTEN, it will not respond to signals until
  2063    // SIGCONT is received." - ptrace(2).
  2064    ASSERT_THAT(ptrace(PTRACE_LISTEN, child_pid, 0, 0), SyscallSucceeds());
  2065    EXPECT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0),
  2066                SyscallFailsWithErrno(ESRCH));
  2067    EXPECT_THAT(waitpid(child_pid, &status, WNOHANG),
  2068                SyscallSucceedsWithValue(0));
  2069    EXPECT_THAT(kill(child_pid, SIGTERM), SyscallSucceeds());
  2070    absl::SleepFor(absl::Seconds(1));
  2071    EXPECT_THAT(waitpid(child_pid, &status, WNOHANG),
  2072                SyscallSucceedsWithValue(0));
  2073  
  2074    // Send SIGCONT to the child, causing it to leave group-stop and re-trap due
  2075    // to PTRACE_LISTEN.
  2076    EXPECT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds());
  2077    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2078                SyscallSucceedsWithValue(child_pid));
  2079    EXPECT_EQ(SIGTRAP | (kPtraceEventStop << 8), status >> 8);
  2080  
  2081    // Detach the child and expect it to exit due to the SIGTERM we sent while
  2082    // it was stopped by PTRACE_LISTEN.
  2083    ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
  2084    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2085                SyscallSucceedsWithValue(child_pid));
  2086    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGTERM)
  2087        << " status " << status;
  2088  }
  2089  
  2090  TEST(PtraceTest, Interrupt_Listen_RequireSeize) {
  2091    pid_t const child_pid = fork();
  2092    if (child_pid == 0) {
  2093      // In child process.
  2094      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
  2095      MaybeSave();
  2096      raise(SIGSTOP);
  2097      _exit(0);
  2098    }
  2099    // In parent process.
  2100    ASSERT_THAT(child_pid, SyscallSucceeds());
  2101  
  2102    // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
  2103    int status;
  2104    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2105                SyscallSucceedsWithValue(child_pid));
  2106    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
  2107        << " status " << status;
  2108  
  2109    // PTRACE_INTERRUPT and PTRACE_LISTEN should fail since the child wasn't
  2110    // attached with PTRACE_SEIZE, leaving the child in signal-delivery-stop.
  2111    EXPECT_THAT(ptrace(PTRACE_INTERRUPT, child_pid, 0, 0),
  2112                SyscallFailsWithErrno(EIO));
  2113    EXPECT_THAT(ptrace(PTRACE_LISTEN, child_pid, 0, 0),
  2114                SyscallFailsWithErrno(EIO));
  2115  
  2116    // Suppress SIGSTOP and detach from the child, expecting it to exit normally.
  2117    ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
  2118    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2119                SyscallSucceedsWithValue(child_pid));
  2120    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
  2121        << " status " << status;
  2122  }
  2123  
  2124  TEST(PtraceTest, SeizeSetOptions) {
  2125    pid_t const child_pid = fork();
  2126    if (child_pid == 0) {
  2127      // In child process.
  2128      while (true) {
  2129        SleepSafe(absl::Seconds(1));
  2130      }
  2131    }
  2132  
  2133    // In parent process.
  2134    ASSERT_THAT(child_pid, SyscallSucceeds());
  2135  
  2136    // Attach to the child with PTRACE_SEIZE while setting PTRACE_O_TRACESYSGOOD.
  2137    ASSERT_THAT(ptrace(PTRACE_SEIZE, child_pid, 0, PTRACE_O_TRACESYSGOOD),
  2138                SyscallSucceeds());
  2139  
  2140    // Stop the child with PTRACE_INTERRUPT.
  2141    ASSERT_THAT(ptrace(PTRACE_INTERRUPT, child_pid, 0, 0), SyscallSucceeds());
  2142    int status;
  2143    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2144                SyscallSucceedsWithValue(child_pid));
  2145    EXPECT_EQ(SIGTRAP | (kPtraceEventStop << 8), status >> 8);
  2146  
  2147    // Resume the child with PTRACE_SYSCALL and wait for it to enter
  2148    // syscall-enter-stop. The stop signal status from the syscall stop should be
  2149    // SIGTRAP|0x80, reflecting PTRACE_O_TRACESYSGOOD.
  2150    ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
  2151    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2152                SyscallSucceedsWithValue(child_pid));
  2153    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
  2154        << " status " << status;
  2155  
  2156    // Clean up the child.
  2157    ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
  2158    ASSERT_THAT(waitpid(child_pid, &status, 0),
  2159                SyscallSucceedsWithValue(child_pid));
  2160    if (WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) {
  2161      // "SIGKILL kills even within system calls (syscall-exit-stop is not
  2162      // generated prior to death by SIGKILL). The net effect is that SIGKILL
  2163      // always kills the process (all its threads), even if some threads of the
  2164      // process are ptraced." - ptrace(2). This is technically true, but...
  2165      //
  2166      // When we send SIGKILL to the child, kernel/signal.c:complete_signal() =>
  2167      // signal_wake_up(resume=1) kicks the tracee out of the syscall-enter-stop.
  2168      // The pending SIGKILL causes the syscall to be skipped, but the child
  2169      // thread still reports syscall-exit before checking for pending signals; in
  2170      // current kernels, this is
  2171      // arch/x86/entry/common.c:syscall_return_slowpath() =>
  2172      // syscall_slow_exit_work() =>
  2173      // include/linux/tracehook.h:tracehook_report_syscall_exit() =>
  2174      // ptrace_report_syscall() => kernel/signal.c:ptrace_notify() =>
  2175      // ptrace_do_notify() => ptrace_stop().
  2176      //
  2177      // ptrace_stop() sets the task's state to TASK_TRACED and the task's
  2178      // exit_code to SIGTRAP|0x80 (passed by ptrace_report_syscall()), then calls
  2179      // freezable_schedule(). freezable_schedule() eventually reaches
  2180      // __schedule(), which detects signal_pending_state() due to the pending
  2181      // SIGKILL, sets the task's state back to TASK_RUNNING, and returns without
  2182      // descheduling. Thus, the task never enters syscall-exit-stop. However, if
  2183      // our wait4() => kernel/exit.c:wait_task_stopped() racily observes the
  2184      // TASK_TRACED state and the non-zero exit code set by ptrace_stop() before
  2185      // __schedule() sets the state back to TASK_RUNNING, it will return the
  2186      // task's exit_code as status W_STOPCODE(SIGTRAP|0x80). So we get a spurious
  2187      // syscall-exit-stop notification, and need to wait4() again for task exit.
  2188      //
  2189      // gVisor is not susceptible to this race because
  2190      // kernel.Task.waitCollectTraceeStopLocked() checks specifically for an
  2191      // active ptraceStop, which is not initiated if SIGKILL is pending.
  2192      std::cout << "Observed syscall-exit after SIGKILL" << std::endl;
  2193      ASSERT_THAT(waitpid(child_pid, &status, 0),
  2194                  SyscallSucceedsWithValue(child_pid));
  2195    }
  2196    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
  2197        << " status " << status;
  2198  }
  2199  
  2200  TEST(PtraceTest, SetYAMAPtraceScope) {
  2201    SKIP_IF(IsRunningWithVFS1());
  2202  
  2203    // Do not modify the ptrace scope on the host.
  2204    SKIP_IF(!IsRunningOnGvisor());
  2205    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
  2206  
  2207    const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
  2208        Open(std::string(kYamaPtraceScopePath), O_RDWR));
  2209  
  2210    ASSERT_THAT(write(fd.get(), "0", 1), SyscallSucceedsWithValue(1));
  2211  
  2212    ASSERT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceeds());
  2213    std::vector<char> buf(10);
  2214    EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), SyscallSucceeds());
  2215    EXPECT_STREQ(buf.data(), "0\n");
  2216  
  2217    // Test that a child can attach to its parent when ptrace_scope is 0.
  2218    AutoCapability cap(CAP_SYS_PTRACE, false);
  2219    pid_t const child_pid = fork();
  2220    if (child_pid == 0) {
  2221      TEST_PCHECK(CheckPtraceAttach(getppid()) == 0);
  2222      _exit(0);
  2223    }
  2224    ASSERT_THAT(child_pid, SyscallSucceeds());
  2225  
  2226    int status;
  2227    ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
  2228    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
  2229        << " status " << status;
  2230  
  2231    // Set ptrace_scope back to 1 (and try writing with a newline).
  2232    ASSERT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceeds());
  2233    ASSERT_THAT(write(fd.get(), "1\n", 2), SyscallSucceedsWithValue(2));
  2234  
  2235    ASSERT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceeds());
  2236    EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), SyscallSucceeds());
  2237    EXPECT_STREQ(buf.data(), "1\n");
  2238  }
  2239  
  2240  }  // namespace
  2241  
  2242  }  // namespace testing
  2243  }  // namespace gvisor
  2244  
  2245  int main(int argc, char** argv) {
  2246    gvisor::testing::TestInit(&argc, &argv);
  2247  
  2248    if (absl::GetFlag(FLAGS_ptrace_test_execve_child)) {
  2249      gvisor::testing::RunExecveChild();
  2250    }
  2251  
  2252    int fd = absl::GetFlag(FLAGS_ptrace_test_fd);
  2253  
  2254    if (absl::GetFlag(FLAGS_ptrace_test_trace_descendants_allowed)) {
  2255      gvisor::testing::RunTraceDescendantsAllowed(fd);
  2256    }
  2257  
  2258    if (absl::GetFlag(FLAGS_ptrace_test_ptrace_attacher)) {
  2259      gvisor::testing::RunPtraceAttacher(fd);
  2260    }
  2261  
  2262    if (absl::GetFlag(FLAGS_ptrace_test_prctl_set_ptracer)) {
  2263      gvisor::testing::RunPrctlSetPtracer(fd);
  2264    }
  2265  
  2266    if (absl::GetFlag(
  2267            FLAGS_ptrace_test_prctl_set_ptracer_and_exit_tracee_thread)) {
  2268      gvisor::testing::RunPrctlSetPtracerPersistsPastTraceeThreadExit(fd);
  2269    }
  2270  
  2271    if (absl::GetFlag(FLAGS_ptrace_test_prctl_set_ptracer_and_exec_non_leader)) {
  2272      gvisor::testing::RunPrctlSetPtracerDoesNotPersistPastNonLeaderExec(
  2273          fd);
  2274    }
  2275  
  2276    if (absl::GetFlag(
  2277            FLAGS_ptrace_test_prctl_set_ptracer_and_exit_tracer_thread)) {
  2278      gvisor::testing::RunPrctlSetPtracerDoesNotPersistPastTracerThreadExit(
  2279          absl::GetFlag(
  2280              FLAGS_ptrace_test_prctl_set_ptracer_and_exit_tracer_thread_tid),
  2281          fd);
  2282    }
  2283  
  2284    if (absl::GetFlag(
  2285            FLAGS_ptrace_test_prctl_set_ptracer_respects_tracer_thread_id)) {
  2286      gvisor::testing::RunPrctlSetPtracerRespectsTracerThreadID(
  2287          absl::GetFlag(
  2288              FLAGS_ptrace_test_prctl_set_ptracer_respects_tracer_thread_id_tid),
  2289          fd);
  2290    }
  2291  
  2292    if (absl::GetFlag(FLAGS_ptrace_test_tracee)) {
  2293      gvisor::testing::RunTracee(fd);
  2294    }
  2295  
  2296    int pid = absl::GetFlag(FLAGS_ptrace_test_trace_tid);
  2297    if (pid != -1) {
  2298      gvisor::testing::RunTraceTID(pid, fd);
  2299    }
  2300  
  2301    return gvisor::testing::RunAllTests();
  2302  }