github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/wait.cc (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <signal.h>
    16  #include <sys/mman.h>
    17  #include <sys/ptrace.h>
    18  #include <sys/resource.h>
    19  #include <sys/time.h>
    20  #include <sys/types.h>
    21  #include <sys/wait.h>
    22  #include <unistd.h>
    23  
    24  #include <functional>
    25  #include <tuple>
    26  #include <vector>
    27  
    28  #include "gmock/gmock.h"
    29  #include "gtest/gtest.h"
    30  #include "absl/strings/str_cat.h"
    31  #include "absl/synchronization/mutex.h"
    32  #include "absl/time/clock.h"
    33  #include "absl/time/time.h"
    34  #include "test/util/cleanup.h"
    35  #include "test/util/file_descriptor.h"
    36  #include "test/util/logging.h"
    37  #include "test/util/multiprocess_util.h"
    38  #include "test/util/posix_error.h"
    39  #include "test/util/signal_util.h"
    40  #include "test/util/test_util.h"
    41  #include "test/util/thread_util.h"
    42  #include "test/util/time_util.h"
    43  
    44  using ::testing::UnorderedElementsAre;
    45  
    46  // These unit tests focus on the wait4(2) system call, but include a basic
    47  // checks for the i386 waitpid(2) syscall, which is a subset of wait4(2).
    48  //
    49  // NOTE(b/22640830,b/27680907,b/29049891): Some functionality is not tested as
    50  // it is not currently supported by gVisor:
    51  // * Process groups.
    52  // * Core dump status (WCOREDUMP).
    53  //
    54  // Tests for waiting on stopped/continued children are in sigstop.cc.
    55  
    56  namespace gvisor {
    57  namespace testing {
    58  
    59  namespace {
    60  
    61  // The CloneChild function seems to need more than one page of stack space.
    62  static const size_t kStackSize = 2 * kPageSize;
    63  
    64  // The child thread created in CloneAndExit runs this function.
    65  // This child does not have the TLS setup, so it must not use glibc functions.
    66  int CloneChild(void* priv) {
    67    int64_t sleep = reinterpret_cast<int64_t>(priv);
    68    SleepSafe(absl::Seconds(sleep));
    69  
    70    // glibc's _exit(2) function wrapper will helpfully call exit_group(2),
    71    // exiting the entire process.
    72    syscall(__NR_exit, 0);
    73    return 1;
    74  }
    75  
    76  // ForkAndExit forks a child process which exits with exit_code, after
    77  // sleeping for the specified duration (seconds).
    78  pid_t ForkAndExit(int exit_code, int64_t sleep) {
    79    pid_t child = fork();
    80    if (child == 0) {
    81      SleepSafe(absl::Seconds(sleep));
    82      _exit(exit_code);
    83    }
    84    return child;
    85  }
    86  
    87  int64_t clock_gettime_nsecs(clockid_t id) {
    88    struct timespec ts;
    89    TEST_PCHECK(clock_gettime(id, &ts) == 0);
    90    return (ts.tv_sec * 1000000000 + ts.tv_nsec);
    91  }
    92  
    93  void spin(int64_t sec) {
    94    int64_t ns = sec * 1000000000;
    95    int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID);
    96    int64_t end = start + ns;
    97  
    98    do {
    99      constexpr int kLoopCount = 1000000;  // large and arbitrary
   100      // volatile to prevent the compiler from skipping this loop.
   101      for (volatile int i = 0; i < kLoopCount; i++) {
   102      }
   103    } while (clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID) < end);
   104  }
   105  
   106  // ForkSpinAndExit forks a child process which exits with exit_code, after
   107  // spinning for the specified duration (seconds).
   108  pid_t ForkSpinAndExit(int exit_code, int64_t spintime) {
   109    pid_t child = fork();
   110    if (child == 0) {
   111      spin(spintime);
   112      _exit(exit_code);
   113    }
   114    return child;
   115  }
   116  
   117  absl::Duration RusageCpuTime(const struct rusage& ru) {
   118    return absl::DurationFromTimeval(ru.ru_utime) +
   119           absl::DurationFromTimeval(ru.ru_stime);
   120  }
   121  
   122  // Returns the address of the top of the stack.
   123  // Free with FreeStack.
   124  uintptr_t AllocStack() {
   125    void* addr = mmap(nullptr, kStackSize, PROT_READ | PROT_WRITE,
   126                      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   127  
   128    if (addr == MAP_FAILED) {
   129      return reinterpret_cast<uintptr_t>(MAP_FAILED);
   130    }
   131  
   132    return reinterpret_cast<uintptr_t>(addr) + kStackSize;
   133  }
   134  
   135  // Frees a stack page allocated with AllocStack.
   136  int FreeStack(uintptr_t addr) {
   137    addr -= kStackSize;
   138    return munmap(reinterpret_cast<void*>(addr), kPageSize);
   139  }
   140  
   141  // CloneAndExit clones a child thread, which exits with 0 after sleeping for
   142  // the specified duration (must be in seconds). extra_flags are ORed against
   143  // the standard clone(2) flags.
   144  int CloneAndExit(int64_t sleep, uintptr_t stack, int extra_flags) {
   145    return clone(CloneChild, reinterpret_cast<void*>(stack),
   146                 CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_VM | extra_flags,
   147                 reinterpret_cast<void*>(sleep));
   148  }
   149  
   150  // Simple wrappers around wait4(2) and waitid(2) that ignore interrupts.
   151  constexpr auto Wait4 = RetryEINTR(wait4);
   152  constexpr auto Waitid = RetryEINTR(waitid);
   153  
   154  // Fixture for tests parameterized by a function that waits for any child to
   155  // exit with the given options, checks that it exited with the given code, and
   156  // then returns its PID.
   157  //
   158  // N.B. These tests run in a multi-threaded environment. We assume that
   159  // background threads do not create child processes and are not themselves
   160  // created with clone(... | SIGCHLD). Either may cause these tests to
   161  // erroneously wait on child processes/threads.
   162  class WaitAnyChildTest : public ::testing::TestWithParam<
   163                               std::function<PosixErrorOr<pid_t>(int, int)>> {
   164   protected:
   165    PosixErrorOr<pid_t> WaitAny(int code) { return WaitAnyWithOptions(code, 0); }
   166  
   167    PosixErrorOr<pid_t> WaitAnyWithOptions(int code, int options) {
   168      return GetParam()(code, options);
   169    }
   170  };
   171  
   172  // Wait for any child to exit.
   173  TEST_P(WaitAnyChildTest, Fork) {
   174    pid_t child;
   175    ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
   176  
   177    EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
   178  }
   179  
   180  // Call wait4 for any process after the child has already exited.
   181  TEST_P(WaitAnyChildTest, AfterExit) {
   182    pid_t child;
   183    ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
   184  
   185    absl::SleepFor(absl::Seconds(5));
   186  
   187    EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
   188  }
   189  
   190  // Wait for multiple children to exit, waiting for either at a time.
   191  TEST_P(WaitAnyChildTest, MultipleFork) {
   192    pid_t child1, child2;
   193    ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds());
   194    ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds());
   195  
   196    std::vector<pid_t> pids;
   197    pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
   198    pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
   199    EXPECT_THAT(pids, UnorderedElementsAre(child1, child2));
   200  }
   201  
   202  // Wait for any child to exit.
   203  // A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like
   204  // a forked process.
   205  TEST_P(WaitAnyChildTest, CloneSIGCHLD) {
   206    uintptr_t stack;
   207    ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
   208    auto free =
   209        Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
   210  
   211    int child;
   212    ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
   213  
   214    EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
   215  }
   216  
   217  // Wait for a child thread and process.
   218  TEST_P(WaitAnyChildTest, ForkAndClone) {
   219    pid_t process;
   220    ASSERT_THAT(process = ForkAndExit(0, 0), SyscallSucceeds());
   221  
   222    uintptr_t stack;
   223    ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
   224    auto free =
   225        Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
   226  
   227    int thread;
   228    // Send SIGCHLD for normal wait semantics.
   229    ASSERT_THAT(thread = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
   230  
   231    std::vector<pid_t> pids;
   232    pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
   233    pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
   234    EXPECT_THAT(pids, UnorderedElementsAre(process, thread));
   235  }
   236  
   237  // Return immediately if no child has exited.
   238  TEST_P(WaitAnyChildTest, WaitWNOHANG) {
   239    EXPECT_THAT(WaitAnyWithOptions(0, WNOHANG),
   240                PosixErrorIs(ECHILD, ::testing::_));
   241  }
   242  
   243  // Bad options passed
   244  TEST_P(WaitAnyChildTest, BadOption) {
   245    EXPECT_THAT(WaitAnyWithOptions(0, 123456),
   246                PosixErrorIs(EINVAL, ::testing::_));
   247  }
   248  
   249  TEST_P(WaitAnyChildTest, WaitedChildRusage) {
   250    struct rusage before;
   251    ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &before), SyscallSucceeds());
   252  
   253    pid_t child;
   254    constexpr absl::Duration kSpin = absl::Seconds(3);
   255    ASSERT_THAT(child = ForkSpinAndExit(0, absl::ToInt64Seconds(kSpin)),
   256                SyscallSucceeds());
   257    ASSERT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
   258  
   259    struct rusage after;
   260    ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &after), SyscallSucceeds());
   261  
   262    EXPECT_GE(RusageCpuTime(after) - RusageCpuTime(before), kSpin);
   263  }
   264  
   265  TEST_P(WaitAnyChildTest, IgnoredChildRusage) {
   266    // "POSIX.1-2001 specifies that if the disposition of SIGCHLD is
   267    // set to SIG_IGN or the SA_NOCLDWAIT flag is set for SIGCHLD (see
   268    // sigaction(2)), then children that terminate do not become zombies and a
   269    // call to wait() or waitpid() will block until all children have terminated,
   270    // and then fail with errno set to ECHILD." - waitpid(2)
   271    //
   272    // "RUSAGE_CHILDREN: Return resource usage statistics for all children of the
   273    // calling process that have terminated *and been waited for*." -
   274    // getrusage(2), emphasis added
   275  
   276    struct sigaction sa;
   277    sa.sa_handler = SIG_IGN;
   278    const auto cleanup_sigact =
   279        ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa));
   280  
   281    struct rusage before;
   282    ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &before), SyscallSucceeds());
   283  
   284    const absl::Duration start =
   285        absl::Nanoseconds(clock_gettime_nsecs(CLOCK_MONOTONIC));
   286  
   287    constexpr absl::Duration kSpin = absl::Seconds(3);
   288  
   289    // ForkAndSpin uses CLOCK_THREAD_CPUTIME_ID, which is lower resolution than,
   290    // and may diverge from, CLOCK_MONOTONIC, so we allow a small grace period but
   291    // still check that we blocked for a while.
   292    constexpr absl::Duration kSpinGrace = absl::Milliseconds(100);
   293  
   294    pid_t child;
   295    ASSERT_THAT(child = ForkSpinAndExit(0, absl::ToInt64Seconds(kSpin)),
   296                SyscallSucceeds());
   297    ASSERT_THAT(WaitAny(0), PosixErrorIs(ECHILD, ::testing::_));
   298    const absl::Duration end =
   299        absl::Nanoseconds(clock_gettime_nsecs(CLOCK_MONOTONIC));
   300    EXPECT_GE(end - start, kSpin - kSpinGrace);
   301  
   302    struct rusage after;
   303    ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &after), SyscallSucceeds());
   304    EXPECT_EQ(before.ru_utime.tv_sec, after.ru_utime.tv_sec);
   305    EXPECT_EQ(before.ru_utime.tv_usec, after.ru_utime.tv_usec);
   306    EXPECT_EQ(before.ru_stime.tv_sec, after.ru_stime.tv_sec);
   307    EXPECT_EQ(before.ru_stime.tv_usec, after.ru_stime.tv_usec);
   308  }
   309  
   310  INSTANTIATE_TEST_SUITE_P(
   311      Waiters, WaitAnyChildTest,
   312      ::testing::Values(
   313          [](int code, int options) -> PosixErrorOr<pid_t> {
   314            int status;
   315            auto const pid = Wait4(-1, &status, options, nullptr);
   316            MaybeSave();
   317            if (pid < 0) {
   318              return PosixError(errno, "wait4");
   319            }
   320            if (!WIFEXITED(status) || WEXITSTATUS(status) != code) {
   321              return PosixError(
   322                  EINVAL, absl::StrCat("unexpected wait status: got ", status,
   323                                       ", wanted ", code));
   324            }
   325            return static_cast<pid_t>(pid);
   326          },
   327          [](int code, int options) -> PosixErrorOr<pid_t> {
   328            siginfo_t si;
   329            auto const rv = Waitid(P_ALL, 0, &si, WEXITED | options);
   330            MaybeSave();
   331            if (rv < 0) {
   332              return PosixError(errno, "waitid");
   333            }
   334            if (si.si_signo != SIGCHLD) {
   335              return PosixError(
   336                  EINVAL, absl::StrCat("unexpected signo: got ", si.si_signo,
   337                                       ", wanted ", SIGCHLD));
   338            }
   339            if (si.si_status != code) {
   340              return PosixError(
   341                  EINVAL, absl::StrCat("unexpected status: got ", si.si_status,
   342                                       ", wanted ", code));
   343            }
   344            if (si.si_code != CLD_EXITED) {
   345              return PosixError(EINVAL,
   346                                absl::StrCat("unexpected code: got ", si.si_code,
   347                                             ", wanted ", CLD_EXITED));
   348            }
   349            auto const uid = getuid();
   350            if (si.si_uid != uid) {
   351              return PosixError(EINVAL,
   352                                absl::StrCat("unexpected uid: got ", si.si_uid,
   353                                             ", wanted ", uid));
   354            }
   355            return static_cast<pid_t>(si.si_pid);
   356          }));
   357  
   358  // Fixture for tests parameterized by a (sysno, function) tuple. The function
   359  // takes the PID of a specific child to wait for, waits for it to exit, and
   360  // checks that it exits with the given code.
   361  class WaitSpecificChildTest
   362      : public ::testing::TestWithParam<
   363            std::tuple<int, std::function<PosixError(pid_t, int, int)>>> {
   364   protected:
   365    int Sysno() { return std::get<0>(GetParam()); }
   366  
   367    PosixError WaitForWithOptions(pid_t pid, int options, int code) {
   368      return std::get<1>(GetParam())(pid, options, code);
   369    }
   370  
   371    PosixError WaitFor(pid_t pid, int code) {
   372      return std::get<1>(GetParam())(pid, 0, code);
   373    }
   374  };
   375  
   376  // Wait for specific child to exit.
   377  TEST_P(WaitSpecificChildTest, Fork) {
   378    pid_t child;
   379    ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
   380  
   381    EXPECT_NO_ERRNO(WaitFor(child, 0));
   382  }
   383  
   384  // Non-zero exit codes are correctly propagated.
   385  TEST_P(WaitSpecificChildTest, NormalExit) {
   386    pid_t child;
   387    ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
   388  
   389    EXPECT_NO_ERRNO(WaitFor(child, 42));
   390  }
   391  
   392  // Wait for multiple children to exit.
   393  TEST_P(WaitSpecificChildTest, MultipleFork) {
   394    pid_t child1, child2;
   395    ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds());
   396    ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds());
   397  
   398    EXPECT_NO_ERRNO(WaitFor(child1, 0));
   399    EXPECT_NO_ERRNO(WaitFor(child2, 0));
   400  }
   401  
   402  // Wait for multiple children to exit, out of the order they were created.
   403  TEST_P(WaitSpecificChildTest, MultipleForkOutOfOrder) {
   404    pid_t child1, child2;
   405    ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds());
   406    ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds());
   407  
   408    EXPECT_NO_ERRNO(WaitFor(child2, 0));
   409    EXPECT_NO_ERRNO(WaitFor(child1, 0));
   410  }
   411  
   412  // Wait for specific child to exit, entering wait4 before the exit occurs.
   413  TEST_P(WaitSpecificChildTest, ForkSleep) {
   414    pid_t child;
   415    ASSERT_THAT(child = ForkAndExit(0, 5), SyscallSucceeds());
   416  
   417    EXPECT_NO_ERRNO(WaitFor(child, 0));
   418  }
   419  
   420  // Wait should block until the child exits.
   421  TEST_P(WaitSpecificChildTest, ForkBlock) {
   422    pid_t child;
   423  
   424    auto start = absl::Now();
   425    ASSERT_THAT(child = ForkAndExit(0, 5), SyscallSucceeds());
   426  
   427    EXPECT_NO_ERRNO(WaitFor(child, 0));
   428  
   429    EXPECT_GE(absl::Now() - start, absl::Seconds(5));
   430  }
   431  
   432  // Waiting after the child has already exited returns immediately.
   433  TEST_P(WaitSpecificChildTest, AfterExit) {
   434    pid_t child;
   435    ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
   436  
   437    absl::SleepFor(absl::Seconds(5));
   438  
   439    EXPECT_NO_ERRNO(WaitFor(child, 0));
   440  }
   441  
   442  // Wait for child of sibling thread.
   443  TEST_P(WaitSpecificChildTest, SiblingChildren) {
   444    absl::Mutex mu;
   445    pid_t child;
   446    bool ready = false;
   447    bool stop = false;
   448  
   449    ScopedThread t([&] {
   450      absl::MutexLock ml(&mu);
   451      EXPECT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
   452      ready = true;
   453      mu.Await(absl::Condition(&stop));
   454    });
   455  
   456    // N.B. This must be declared after ScopedThread, so it is destructed first,
   457    // thus waking the thread.
   458    absl::MutexLock ml(&mu);
   459    mu.Await(absl::Condition(&ready));
   460  
   461    EXPECT_NO_ERRNO(WaitFor(child, 0));
   462  
   463    // Keep the sibling alive until after we've waited so the child isn't
   464    // reparented.
   465    stop = true;
   466  }
   467  
   468  // Waiting for child of sibling thread not allowed with WNOTHREAD.
   469  TEST_P(WaitSpecificChildTest, SiblingChildrenWNOTHREAD) {
   470    // Linux added WNOTHREAD support to waitid(2) in
   471    // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to
   472    // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7.
   473    //
   474    // Skip the test if it isn't supported yet.
   475    if (Sysno() == SYS_waitid) {
   476      int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WNOTHREAD);
   477      SKIP_IF(ret < 0 && errno == EINVAL);
   478    }
   479  
   480    absl::Mutex mu;
   481    pid_t child;
   482    bool ready = false;
   483    bool stop = false;
   484  
   485    ScopedThread t([&] {
   486      absl::MutexLock ml(&mu);
   487      EXPECT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
   488      ready = true;
   489      mu.Await(absl::Condition(&stop));
   490  
   491      // This thread can wait on child.
   492      EXPECT_NO_ERRNO(WaitForWithOptions(child, __WNOTHREAD, 0));
   493    });
   494  
   495    // N.B. This must be declared after ScopedThread, so it is destructed first,
   496    // thus waking the thread.
   497    absl::MutexLock ml(&mu);
   498    mu.Await(absl::Condition(&ready));
   499  
   500    // This thread can't wait on child.
   501    EXPECT_THAT(WaitForWithOptions(child, __WNOTHREAD, 0),
   502                PosixErrorIs(ECHILD, ::testing::_));
   503  
   504    // Keep the sibling alive until after we've waited so the child isn't
   505    // reparented.
   506    stop = true;
   507  }
   508  
   509  // Wait for specific child to exit.
   510  // A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like
   511  // a forked process.
   512  TEST_P(WaitSpecificChildTest, CloneSIGCHLD) {
   513    uintptr_t stack;
   514    ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
   515    auto free =
   516        Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
   517  
   518    int child;
   519    ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
   520  
   521    EXPECT_NO_ERRNO(WaitFor(child, 0));
   522  }
   523  
   524  // Wait for specific child to exit.
   525  // A non-CLONE_THREAD child which does not send SIGCHLD upon exit can be waited
   526  // on, but returns ECHILD.
   527  TEST_P(WaitSpecificChildTest, CloneNoSIGCHLD) {
   528    uintptr_t stack;
   529    ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
   530    auto free =
   531        Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
   532  
   533    int child;
   534    ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds());
   535  
   536    EXPECT_THAT(WaitFor(child, 0), PosixErrorIs(ECHILD, ::testing::_));
   537  }
   538  
   539  // Waiting after the child has already exited returns immediately.
   540  TEST_P(WaitSpecificChildTest, CloneAfterExit) {
   541    uintptr_t stack;
   542    ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
   543    auto free =
   544        Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
   545  
   546    int child;
   547    // Send SIGCHLD for normal wait semantics.
   548    ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
   549  
   550    absl::SleepFor(absl::Seconds(5));
   551  
   552    EXPECT_NO_ERRNO(WaitFor(child, 0));
   553  }
   554  
   555  // A CLONE_THREAD child cannot be waited on.
   556  TEST_P(WaitSpecificChildTest, CloneThread) {
   557    uintptr_t stack;
   558    ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
   559    auto free =
   560        Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
   561  
   562    int child;
   563    ASSERT_THAT(child = CloneAndExit(15, stack, CLONE_THREAD), SyscallSucceeds());
   564    auto start = absl::Now();
   565  
   566    EXPECT_THAT(WaitFor(child, 0), PosixErrorIs(ECHILD, ::testing::_));
   567  
   568    // Ensure wait4 didn't block.
   569    EXPECT_LE(absl::Now() - start, absl::Seconds(10));
   570  
   571    // Since we can't wait on the child, we sleep to try to avoid freeing its
   572    // stack before it exits.
   573    absl::SleepFor(absl::Seconds(5));
   574  }
   575  
   576  // A child that does not send a SIGCHLD on exit may be waited on with
   577  // the __WCLONE flag.
   578  TEST_P(WaitSpecificChildTest, CloneWCLONE) {
   579    // Linux added WCLONE support to waitid(2) in
   580    // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to
   581    // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7.
   582    //
   583    // Skip the test if it isn't supported yet.
   584    if (Sysno() == SYS_waitid) {
   585      int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WCLONE);
   586      SKIP_IF(ret < 0 && errno == EINVAL);
   587    }
   588  
   589    uintptr_t stack;
   590    ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
   591    auto free =
   592        Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
   593  
   594    int child;
   595    ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds());
   596  
   597    EXPECT_NO_ERRNO(WaitForWithOptions(child, __WCLONE, 0));
   598  }
   599  
   600  // A forked child cannot be waited on with WCLONE.
   601  TEST_P(WaitSpecificChildTest, ForkWCLONE) {
   602    // Linux added WCLONE support to waitid(2) in
   603    // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to
   604    // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7.
   605    //
   606    // Skip the test if it isn't supported yet.
   607    if (Sysno() == SYS_waitid) {
   608      int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WCLONE);
   609      SKIP_IF(ret < 0 && errno == EINVAL);
   610    }
   611  
   612    pid_t child;
   613    ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
   614  
   615    EXPECT_THAT(WaitForWithOptions(child, WNOHANG | __WCLONE, 0),
   616                PosixErrorIs(ECHILD, ::testing::_));
   617  
   618    EXPECT_NO_ERRNO(WaitFor(child, 0));
   619  }
   620  
   621  // Any type of child can be waited on with WALL.
   622  TEST_P(WaitSpecificChildTest, WALL) {
   623    // Linux added WALL support to waitid(2) in
   624    // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to
   625    // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7.
   626    //
   627    // Skip the test if it isn't supported yet.
   628    if (Sysno() == SYS_waitid) {
   629      int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WALL);
   630      SKIP_IF(ret < 0 && errno == EINVAL);
   631    }
   632  
   633    pid_t child;
   634    ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
   635  
   636    EXPECT_NO_ERRNO(WaitForWithOptions(child, __WALL, 0));
   637  
   638    uintptr_t stack;
   639    ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
   640    auto free =
   641        Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
   642  
   643    ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds());
   644  
   645    EXPECT_NO_ERRNO(WaitForWithOptions(child, __WALL, 0));
   646  }
   647  
   648  // Return ECHILD for bad child.
   649  TEST_P(WaitSpecificChildTest, BadChild) {
   650    EXPECT_THAT(WaitFor(42, 0), PosixErrorIs(ECHILD, ::testing::_));
   651  }
   652  
   653  // Wait for a child process that only exits after calling execve(2) from a
   654  // non-leader thread.
   655  TEST_P(WaitSpecificChildTest, AfterChildExecve) {
   656    ExecveArray const owned_child_argv = {"/bin/true"};
   657    char* const* const child_argv = owned_child_argv.get();
   658  
   659    uintptr_t stack;
   660    ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
   661    auto free =
   662        Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
   663  
   664    pid_t const child = fork();
   665    if (child == 0) {
   666      // Give the parent some time to start waiting.
   667      SleepSafe(absl::Seconds(5));
   668      // Pass CLONE_VFORK to block the original thread in the child process until
   669      // the clone thread calls execve, annihilating them both. (This means that
   670      // if clone returns at all, something went wrong.)
   671      //
   672      // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's
   673      // x86_64 implementation is safe. See glibc
   674      // sysdeps/unix/sysv/linux/x86_64/clone.S.
   675      clone(
   676          +[](void* arg) {
   677            auto child_argv = static_cast<char* const*>(arg);
   678            execve(child_argv[0], child_argv, /* envp = */ nullptr);
   679            return errno;
   680          },
   681          reinterpret_cast<void*>(stack),
   682          CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM |
   683              CLONE_VFORK,
   684          const_cast<char**>(child_argv));
   685      _exit(errno);
   686    }
   687    ASSERT_THAT(child, SyscallSucceeds());
   688    EXPECT_NO_ERRNO(WaitFor(child, 0));
   689  }
   690  
   691  PosixError CheckWait4(pid_t pid, int options, int code) {
   692    int status;
   693    auto const rv = Wait4(pid, &status, options, nullptr);
   694    MaybeSave();
   695    if (rv < 0) {
   696      return PosixError(errno, "wait4");
   697    } else if (rv != pid) {
   698      return PosixError(
   699          EINVAL, absl::StrCat("unexpected pid: got ", rv, ", wanted ", pid));
   700    }
   701    if (!WIFEXITED(status) || WEXITSTATUS(status) != code) {
   702      return PosixError(EINVAL, absl::StrCat("unexpected wait status: got ",
   703                                             status, ", wanted ", code));
   704    }
   705    return NoError();
   706  };
   707  
   708  PosixError CheckWaitid(pid_t pid, int options, int code) {
   709    siginfo_t si;
   710    auto const rv = Waitid(P_PID, pid, &si, options | WEXITED);
   711    MaybeSave();
   712    if (rv < 0) {
   713      return PosixError(errno, "waitid");
   714    }
   715    if (si.si_pid != pid) {
   716      return PosixError(EINVAL, absl::StrCat("unexpected pid: got ", si.si_pid,
   717                                             ", wanted ", pid));
   718    }
   719    if (si.si_signo != SIGCHLD) {
   720      return PosixError(EINVAL, absl::StrCat("unexpected signo: got ",
   721                                             si.si_signo, ", wanted ", SIGCHLD));
   722    }
   723    if (si.si_status != code) {
   724      return PosixError(EINVAL, absl::StrCat("unexpected status: got ",
   725                                             si.si_status, ", wanted ", code));
   726    }
   727    if (si.si_code != CLD_EXITED) {
   728      return PosixError(EINVAL, absl::StrCat("unexpected code: got ", si.si_code,
   729                                             ", wanted ", CLD_EXITED));
   730    }
   731    return NoError();
   732  }
   733  
   734  INSTANTIATE_TEST_SUITE_P(
   735      Waiters, WaitSpecificChildTest,
   736      ::testing::Values(std::make_tuple(SYS_wait4, CheckWait4),
   737                        std::make_tuple(SYS_waitid, CheckWaitid)));
   738  
   739  // WIFEXITED, WIFSIGNALED, WTERMSIG indicate signal exit.
   740  TEST(WaitTest, SignalExit) {
   741    pid_t child;
   742    ASSERT_THAT(child = ForkAndExit(0, 10), SyscallSucceeds());
   743  
   744    EXPECT_THAT(kill(child, SIGKILL), SyscallSucceeds());
   745  
   746    int status;
   747    EXPECT_THAT(Wait4(child, &status, 0, nullptr),
   748                SyscallSucceedsWithValue(child));
   749  
   750    EXPECT_FALSE(WIFEXITED(status));
   751    EXPECT_TRUE(WIFSIGNALED(status));
   752    EXPECT_EQ(SIGKILL, WTERMSIG(status));
   753  }
   754  
   755  // waitid requires at least one option.
   756  TEST(WaitTest, WaitidOptions) {
   757    EXPECT_THAT(Waitid(P_ALL, 0, nullptr, 0), SyscallFailsWithErrno(EINVAL));
   758  }
   759  
   760  // waitid does not wait for a child to exit if not passed WEXITED.
   761  TEST(WaitTest, WaitidNoWEXITED) {
   762    pid_t child;
   763    ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
   764    EXPECT_THAT(Waitid(P_ALL, 0, nullptr, WSTOPPED),
   765                SyscallFailsWithErrno(ECHILD));
   766    EXPECT_THAT(Waitid(P_ALL, 0, nullptr, WEXITED), SyscallSucceeds());
   767  }
   768  
   769  // WNOWAIT allows the same wait result to be returned again.
   770  TEST(WaitTest, WaitidWNOWAIT) {
   771    pid_t child;
   772    ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
   773  
   774    siginfo_t info;
   775    ASSERT_THAT(Waitid(P_PID, child, &info, WEXITED | WNOWAIT),
   776                SyscallSucceeds());
   777    EXPECT_EQ(child, info.si_pid);
   778    EXPECT_EQ(SIGCHLD, info.si_signo);
   779    EXPECT_EQ(CLD_EXITED, info.si_code);
   780    EXPECT_EQ(42, info.si_status);
   781  
   782    ASSERT_THAT(Waitid(P_PID, child, &info, WEXITED), SyscallSucceeds());
   783    EXPECT_EQ(child, info.si_pid);
   784    EXPECT_EQ(SIGCHLD, info.si_signo);
   785    EXPECT_EQ(CLD_EXITED, info.si_code);
   786    EXPECT_EQ(42, info.si_status);
   787  
   788    EXPECT_THAT(Waitid(P_PID, child, &info, WEXITED),
   789                SyscallFailsWithErrno(ECHILD));
   790  }
   791  
   792  // waitpid(pid, status, options) is equivalent to
   793  // wait4(pid, status, options, nullptr).
   794  // This is a dedicated syscall on i386, glibc maps it to wait4 on amd64.
   795  TEST(WaitTest, WaitPid) {
   796    pid_t child;
   797    ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
   798  
   799    int status;
   800    EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0),
   801                SyscallSucceedsWithValue(child));
   802  
   803    EXPECT_TRUE(WIFEXITED(status));
   804    EXPECT_EQ(42, WEXITSTATUS(status));
   805  }
   806  
   807  // Test that signaling a zombie succeeds. This is a signals test that is in this
   808  // file for some reason.
   809  TEST(WaitTest, KillZombie) {
   810    pid_t child;
   811    ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
   812  
   813    // Sleep for three seconds to ensure the child has exited.
   814    absl::SleepFor(absl::Seconds(3));
   815  
   816    // The child is now a zombie. Check that killing it returns 0.
   817    EXPECT_THAT(kill(child, SIGTERM), SyscallSucceeds());
   818    EXPECT_THAT(kill(child, 0), SyscallSucceeds());
   819  
   820    EXPECT_THAT(Wait4(child, nullptr, 0, nullptr),
   821                SyscallSucceedsWithValue(child));
   822  }
   823  
   824  TEST(WaitTest, Wait4Rusage) {
   825    pid_t child;
   826    constexpr absl::Duration kSpin = absl::Seconds(3);
   827    ASSERT_THAT(child = ForkSpinAndExit(21, absl::ToInt64Seconds(kSpin)),
   828                SyscallSucceeds());
   829  
   830    int status;
   831    struct rusage rusage = {};
   832    ASSERT_THAT(Wait4(child, &status, 0, &rusage),
   833                SyscallSucceedsWithValue(child));
   834  
   835    EXPECT_TRUE(WIFEXITED(status));
   836    EXPECT_EQ(21, WEXITSTATUS(status));
   837  
   838    EXPECT_GE(RusageCpuTime(rusage), kSpin);
   839  }
   840  
   841  TEST(WaitTest, WaitidRusage) {
   842    pid_t child;
   843    constexpr absl::Duration kSpin = absl::Seconds(3);
   844    ASSERT_THAT(child = ForkSpinAndExit(27, absl::ToInt64Seconds(kSpin)),
   845                SyscallSucceeds());
   846  
   847    siginfo_t si = {};
   848    struct rusage rusage = {};
   849  
   850    // From waitid(2):
   851    // The  raw  waitid()  system  call  takes a fifth argument, of type
   852    // struct rusage *. If this argument is non-NULL, then  it  is  used
   853    // to return resource  usage  information  about  the  child,  in the
   854    // same manner as wait4(2).
   855    EXPECT_THAT(
   856        RetryEINTR(syscall)(SYS_waitid, P_PID, child, &si, WEXITED, &rusage),
   857        SyscallSucceeds());
   858    EXPECT_EQ(si.si_signo, SIGCHLD);
   859    EXPECT_EQ(si.si_code, CLD_EXITED);
   860    EXPECT_EQ(si.si_status, 27);
   861    EXPECT_EQ(si.si_pid, child);
   862  
   863    EXPECT_GE(RusageCpuTime(rusage), kSpin);
   864  }
   865  
   866  // After bf959931ddb88c4e4366e96dd22e68fa0db9527c ("wait/ptrace: assume __WALL
   867  // if the child is traced") (Linux 4.7), tracees are always eligible for
   868  // waiting, regardless of type.
   869  TEST(WaitTest, TraceeWALL) {
   870    int fds[2];
   871    ASSERT_THAT(pipe(fds), SyscallSucceeds());
   872    FileDescriptor rfd(fds[0]);
   873    FileDescriptor wfd(fds[1]);
   874  
   875    pid_t child = fork();
   876    if (child == 0) {
   877      // Child.
   878      rfd.reset();
   879  
   880      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == 0);
   881  
   882      // Notify parent that we're now a tracee.
   883      wfd.reset();
   884  
   885      _exit(0);
   886    }
   887    ASSERT_THAT(child, SyscallSucceeds());
   888  
   889    wfd.reset();
   890  
   891    // Wait for child to become tracee.
   892    char c;
   893    EXPECT_THAT(ReadFd(rfd.get(), &c, sizeof(c)), SyscallSucceedsWithValue(0));
   894  
   895    // We can wait on the fork child with WCLONE, as it is a tracee.
   896    int status;
   897    if (IsRunningOnGvisor()) {
   898      ASSERT_THAT(Wait4(child, &status, __WCLONE, nullptr),
   899                  SyscallSucceedsWithValue(child));
   900  
   901      EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status;
   902    } else {
   903      // On older versions of Linux, we may get ECHILD.
   904      ASSERT_THAT(Wait4(child, &status, __WCLONE, nullptr),
   905                  ::testing::AnyOf(SyscallSucceedsWithValue(child),
   906                                   SyscallFailsWithErrno(ECHILD)));
   907    }
   908  }
   909  
   910  }  // namespace
   911  
   912  }  // namespace testing
   913  }  // namespace gvisor