github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/fork.cc (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <errno.h>
    16  #include <fcntl.h>
    17  #include <sched.h>
    18  #include <stdlib.h>
    19  #include <sys/mman.h>
    20  #include <sys/stat.h>
    21  #include <sys/types.h>
    22  #include <unistd.h>
    23  
    24  #include <atomic>
    25  #include <cstdlib>
    26  
    27  #include "gtest/gtest.h"
    28  #include "absl/time/clock.h"
    29  #include "absl/time/time.h"
    30  #include "test/util/capability_util.h"
    31  #include "test/util/logging.h"
    32  #include "test/util/memory_util.h"
    33  #include "test/util/test_util.h"
    34  #include "test/util/thread_util.h"
    35  
    36  namespace gvisor {
    37  namespace testing {
    38  
    39  namespace {
    40  
    41  using ::testing::Ge;
    42  
    43  class ForkTest : public ::testing::Test {
    44   protected:
    45    // SetUp creates a populated, open file.
    46    void SetUp() override {
    47      // Make a shared mapping.
    48      shared_ = reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE,
    49                                             MAP_SHARED | MAP_ANONYMOUS, -1, 0));
    50      ASSERT_NE(reinterpret_cast<void*>(shared_), MAP_FAILED);
    51  
    52      // Make a private mapping.
    53      private_ =
    54          reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE,
    55                                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
    56      ASSERT_NE(reinterpret_cast<void*>(private_), MAP_FAILED);
    57  
    58      // Make a pipe.
    59      ASSERT_THAT(pipe(pipes_), SyscallSucceeds());
    60    }
    61  
    62    // TearDown frees associated resources.
    63    void TearDown() override {
    64      EXPECT_THAT(munmap(shared_, kPageSize), SyscallSucceeds());
    65      EXPECT_THAT(munmap(private_, kPageSize), SyscallSucceeds());
    66      EXPECT_THAT(close(pipes_[0]), SyscallSucceeds());
    67      EXPECT_THAT(close(pipes_[1]), SyscallSucceeds());
    68    }
    69  
    70    // Fork executes a clone system call.
    71    pid_t Fork() {
    72      pid_t pid = fork();
    73      MaybeSave();
    74      TEST_PCHECK_MSG(pid >= 0, "fork failed");
    75      return pid;
    76    }
    77  
    78    // Wait waits for the given pid and returns the exit status. If the child was
    79    // killed by a signal or an error occurs, then 256+signal is returned.
    80    int Wait(pid_t pid) {
    81      int status;
    82      while (true) {
    83        int rval = wait4(pid, &status, 0, NULL);
    84        if (rval < 0) {
    85          return rval;
    86        }
    87        if (rval != pid) {
    88          continue;
    89        }
    90        if (WIFEXITED(status)) {
    91          return WEXITSTATUS(status);
    92        }
    93        if (WIFSIGNALED(status)) {
    94          return 256 + WTERMSIG(status);
    95        }
    96      }
    97    }
    98  
    99    // Exit exits the proccess.
   100    void Exit(int code) {
   101      _exit(code);
   102  
   103      // Should never reach here. Since the exit above failed, we really don't
   104      // have much in the way of options to indicate failure. So we just try to
   105      // log an assertion failure to the logs. The parent process will likely
   106      // fail anyways if exit is not working.
   107      TEST_CHECK_MSG(false, "_exit returned");
   108    }
   109  
   110    // ReadByte reads a byte from the shared pipe.
   111    char ReadByte() {
   112      char val = -1;
   113      TEST_PCHECK(ReadFd(pipes_[0], &val, 1) == 1);
   114      MaybeSave();
   115      return val;
   116    }
   117  
   118    // WriteByte writes a byte from the shared pipe.
   119    void WriteByte(char val) {
   120      TEST_PCHECK(WriteFd(pipes_[1], &val, 1) == 1);
   121      MaybeSave();
   122    }
   123  
   124    // Shared pipe.
   125    int pipes_[2];
   126  
   127    // Shared mapping (one page).
   128    char* shared_;
   129  
   130    // Private mapping (one page).
   131    char* private_;
   132  };
   133  
   134  TEST_F(ForkTest, Simple) {
   135    pid_t child = Fork();
   136    if (child == 0) {
   137      Exit(0);
   138    }
   139    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
   140  }
   141  
   142  TEST_F(ForkTest, ExitCode) {
   143    pid_t child = Fork();
   144    if (child == 0) {
   145      Exit(123);
   146    }
   147    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(123));
   148    child = Fork();
   149    if (child == 0) {
   150      Exit(1);
   151    }
   152    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(1));
   153  }
   154  
   155  TEST_F(ForkTest, Multi) {
   156    pid_t child1 = Fork();
   157    if (child1 == 0) {
   158      Exit(0);
   159    }
   160    pid_t child2 = Fork();
   161    if (child2 == 0) {
   162      Exit(1);
   163    }
   164    EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0));
   165    EXPECT_THAT(Wait(child2), SyscallSucceedsWithValue(1));
   166  }
   167  
   168  TEST_F(ForkTest, Pipe) {
   169    pid_t child = Fork();
   170    if (child == 0) {
   171      WriteByte(1);
   172      Exit(0);
   173    }
   174    EXPECT_EQ(ReadByte(), 1);
   175    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
   176  }
   177  
   178  TEST_F(ForkTest, SharedMapping) {
   179    pid_t child = Fork();
   180    if (child == 0) {
   181      // Wait for the parent.
   182      ReadByte();
   183      if (shared_[0] == 1) {
   184        Exit(0);
   185      }
   186      // Failed.
   187      Exit(1);
   188    }
   189    // Change the mapping.
   190    ASSERT_EQ(shared_[0], 0);
   191    shared_[0] = 1;
   192    // Unblock the child.
   193    WriteByte(0);
   194    // Did it work?
   195    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
   196  }
   197  
   198  TEST_F(ForkTest, PrivateMapping) {
   199    pid_t child = Fork();
   200    if (child == 0) {
   201      // Wait for the parent.
   202      ReadByte();
   203      if (private_[0] == 0) {
   204        Exit(0);
   205      }
   206      // Failed.
   207      Exit(1);
   208    }
   209    // Change the mapping.
   210    ASSERT_EQ(private_[0], 0);
   211    private_[0] = 1;
   212    // Unblock the child.
   213    WriteByte(0);
   214    // Did it work?
   215    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
   216  }
   217  
   218  // CPUID is x86 specific.
   219  #ifdef __x86_64__
   220  // Test that cpuid works after a fork.
   221  TEST_F(ForkTest, Cpuid) {
   222    pid_t child = Fork();
   223  
   224    // We should be able to determine the CPU vendor.
   225    ASSERT_NE(GetCPUVendor(), CPUVendor::kUnknownVendor);
   226  
   227    if (child == 0) {
   228      Exit(0);
   229    }
   230    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
   231  }
   232  #endif
   233  
   234  TEST_F(ForkTest, Mmap) {
   235    pid_t child = Fork();
   236  
   237    if (child == 0) {
   238      void* addr =
   239          mmap(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   240      MaybeSave();
   241      Exit(addr == MAP_FAILED);
   242    }
   243  
   244    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
   245  }
   246  
   247  static volatile int alarmed = 0;
   248  
   249  void AlarmHandler(int sig, siginfo_t* info, void* context) { alarmed = 1; }
   250  
   251  TEST_F(ForkTest, Alarm) {
   252    // Setup an alarm handler.
   253    struct sigaction sa;
   254    sa.sa_sigaction = AlarmHandler;
   255    sigfillset(&sa.sa_mask);
   256    sa.sa_flags = SA_SIGINFO;
   257    EXPECT_THAT(sigaction(SIGALRM, &sa, nullptr), SyscallSucceeds());
   258  
   259    pid_t child = Fork();
   260  
   261    if (child == 0) {
   262      alarm(1);
   263      sleep(3);
   264      if (!alarmed) {
   265        Exit(1);
   266      }
   267      Exit(0);
   268    }
   269  
   270    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
   271    EXPECT_EQ(0, alarmed);
   272  }
   273  
   274  // Child cannot affect parent private memory. Regression test for b/24137240.
   275  TEST_F(ForkTest, PrivateMemory) {
   276    std::atomic<uint32_t> local(0);
   277  
   278    pid_t child1 = Fork();
   279    if (child1 == 0) {
   280      local++;
   281  
   282      pid_t child2 = Fork();
   283      if (child2 == 0) {
   284        local++;
   285  
   286        TEST_CHECK(local.load() == 2);
   287  
   288        Exit(0);
   289      }
   290  
   291      TEST_PCHECK(Wait(child2) == 0);
   292      TEST_CHECK(local.load() == 1);
   293      Exit(0);
   294    }
   295  
   296    EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0));
   297    EXPECT_EQ(0, local.load());
   298  }
   299  
   300  // Kernel-accessed buffers should remain coherent across COW.
   301  //
   302  // The buffer must be >= usermem.ZeroCopyMinBytes, as UnsafeAccess operates
   303  // differently. Regression test for b/33811887.
   304  TEST_F(ForkTest, COWSegment) {
   305    constexpr int kBufSize = 1024;
   306    char* read_buf = private_;
   307    char* touch = private_ + kPageSize / 2;
   308  
   309    std::string contents(kBufSize, 'a');
   310  
   311    ScopedThread t([&] {
   312      // Wait to be sure the parent is blocked in read.
   313      absl::SleepFor(absl::Seconds(3));
   314  
   315      // Fork to mark private pages for COW.
   316      //
   317      // Use fork directly rather than the Fork wrapper to skip the multi-threaded
   318      // check, and limit the child to async-signal-safe functions:
   319      //
   320      // "After a fork() in a multithreaded program, the child can safely call
   321      // only async-signal-safe functions (see signal(7)) until such time as it
   322      // calls execve(2)."
   323      //
   324      // Skip ASSERT in the child, as it isn't async-signal-safe.
   325      pid_t child = fork();
   326      if (child == 0) {
   327        // Wait to be sure parent touched memory.
   328        sleep(3);
   329        Exit(0);
   330      }
   331  
   332      // Check success only in the parent.
   333      ASSERT_THAT(child, SyscallSucceedsWithValue(Ge(0)));
   334  
   335      // Trigger COW on private page.
   336      *touch = 42;
   337  
   338      // Write to pipe. Parent should still be able to read this.
   339      EXPECT_THAT(WriteFd(pipes_[1], contents.c_str(), kBufSize),
   340                  SyscallSucceedsWithValue(kBufSize));
   341  
   342      EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
   343    });
   344  
   345    EXPECT_THAT(ReadFd(pipes_[0], read_buf, kBufSize),
   346                SyscallSucceedsWithValue(kBufSize));
   347    EXPECT_STREQ(contents.c_str(), read_buf);
   348  }
   349  
   350  TEST_F(ForkTest, SigAltStack) {
   351    std::vector<char> stack_mem(SIGSTKSZ);
   352    stack_t stack = {};
   353    stack.ss_size = SIGSTKSZ;
   354    stack.ss_sp = stack_mem.data();
   355    ASSERT_THAT(sigaltstack(&stack, nullptr), SyscallSucceeds());
   356  
   357    pid_t child = Fork();
   358  
   359    if (child == 0) {
   360      stack_t oss = {};
   361      TEST_PCHECK(sigaltstack(nullptr, &oss) == 0);
   362      MaybeSave();
   363  
   364      TEST_CHECK((oss.ss_flags & SS_DISABLE) == 0);
   365      TEST_CHECK(oss.ss_size == SIGSTKSZ);
   366      TEST_CHECK(oss.ss_sp == stack.ss_sp);
   367  
   368      Exit(0);
   369    }
   370    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
   371  }
   372  
   373  TEST_F(ForkTest, Affinity) {
   374    // Make a non-default cpumask.
   375    cpu_set_t parent_mask;
   376    EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask),
   377                SyscallSucceeds());
   378    // Knock out the lowest bit.
   379    for (unsigned int n = 0; n < CPU_SETSIZE; n++) {
   380      if (CPU_ISSET(n, &parent_mask)) {
   381        CPU_CLR(n, &parent_mask);
   382        break;
   383      }
   384    }
   385    EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask),
   386                SyscallSucceeds());
   387  
   388    pid_t child = Fork();
   389    if (child == 0) {
   390      cpu_set_t child_mask;
   391  
   392      int ret = sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &child_mask);
   393      MaybeSave();
   394      if (ret < 0) {
   395        Exit(-ret);
   396      }
   397  
   398      TEST_CHECK(CPU_EQUAL(&child_mask, &parent_mask));
   399  
   400      Exit(0);
   401    }
   402  
   403    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
   404  }
   405  
   406  TEST(CloneTest, NewUserNamespacePermitsAllOtherNamespaces) {
   407    // "If CLONE_NEWUSER is specified along with other CLONE_NEW* flags in a
   408    // single clone(2) or unshare(2) call, the user namespace is guaranteed to be
   409    // created first, giving the child (clone(2)) or caller (unshare(2))
   410    // privileges over the remaining namespaces created by the call. Thus, it is
   411    // possible for an unprivileged caller to specify this combination of flags."
   412    // - user_namespaces(7)
   413    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
   414    Mapping child_stack = ASSERT_NO_ERRNO_AND_VALUE(
   415        MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
   416    int child_pid;
   417    // We only test with CLONE_NEWIPC, CLONE_NEWNET, and CLONE_NEWUTS since these
   418    // namespaces were implemented in Linux before user namespaces.
   419    ASSERT_THAT(
   420        child_pid = clone(
   421            +[](void*) { return 0; },
   422            reinterpret_cast<void*>(child_stack.addr() + kPageSize),
   423            CLONE_NEWUSER | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUTS | SIGCHLD,
   424            /* arg = */ nullptr),
   425        SyscallSucceeds());
   426  
   427    int status;
   428    ASSERT_THAT(waitpid(child_pid, &status, 0),
   429                SyscallSucceedsWithValue(child_pid));
   430    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   431        << "status = " << status;
   432  }
   433  
   434  // Clone with CLONE_SETTLS and a non-canonical TLS address is rejected.
   435  TEST(CloneTest, NonCanonicalTLS) {
   436    constexpr uintptr_t kNonCanonical = 1ull << 48;
   437  
   438    // We need a valid address for the stack pointer. We'll never actually execute
   439    // on this.
   440    char stack;
   441  
   442    // The raw system call interface on x86-64 is:
   443    // long clone(unsigned long flags, void *stack,
   444    //            int *parent_tid, int *child_tid,
   445    //            unsigned long tls);
   446    //
   447    // While on arm64, the order of the last two arguments is reversed:
   448    // long clone(unsigned long flags, void *stack,
   449    //            int *parent_tid, unsigned long tls,
   450    //            int *child_tid);
   451  #if defined(__x86_64__)
   452    EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr,
   453                        nullptr, kNonCanonical),
   454                SyscallFailsWithErrno(EPERM));
   455  #elif defined(__aarch64__)
   456    EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr,
   457                        kNonCanonical, nullptr),
   458                SyscallFailsWithErrno(EPERM));
   459  #endif
   460  }
   461  
   462  }  // namespace
   463  }  // namespace testing
   464  }  // namespace gvisor