github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/mmap.cc (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <errno.h>
    16  #include <fcntl.h>
    17  #include <linux/magic.h>
    18  #include <linux/unistd.h>
    19  #include <signal.h>
    20  #include <stdio.h>
    21  #include <stdlib.h>
    22  #include <string.h>
    23  #include <sys/mman.h>
    24  #include <sys/resource.h>
    25  #include <sys/statfs.h>
    26  #include <sys/syscall.h>
    27  #include <sys/time.h>
    28  #include <sys/types.h>
    29  #include <sys/wait.h>
    30  #include <unistd.h>
    31  
    32  #include <vector>
    33  
    34  #include "gmock/gmock.h"
    35  #include "gtest/gtest.h"
    36  #include "absl/strings/escaping.h"
    37  #include "absl/strings/str_split.h"
    38  #include "test/util/cleanup.h"
    39  #include "test/util/file_descriptor.h"
    40  #include "test/util/fs_util.h"
    41  #include "test/util/memory_util.h"
    42  #include "test/util/multiprocess_util.h"
    43  #include "test/util/temp_path.h"
    44  #include "test/util/test_util.h"
    45  
    46  using ::testing::AnyOf;
    47  using ::testing::Eq;
    48  using ::testing::Gt;
    49  
    50  namespace gvisor {
    51  namespace testing {
    52  
    53  namespace {
    54  
    55  PosixErrorOr<int64_t> VirtualMemorySize() {
    56    ASSIGN_OR_RETURN_ERRNO(auto contents, GetContents("/proc/self/statm"));
    57    std::vector<std::string> parts = absl::StrSplit(contents, ' ');
    58    if (parts.empty()) {
    59      return PosixError(EINVAL, "Unable to parse /proc/self/statm");
    60    }
    61    ASSIGN_OR_RETURN_ERRNO(auto pages, Atoi<int64_t>(parts[0]));
    62    return pages * getpagesize();
    63  }
    64  
    65  class MMapTest : public ::testing::Test {
    66   protected:
    67    // Unmap mapping, if one was made.
    68    void TearDown() override {
    69      if (addr_) {
    70        EXPECT_THAT(Unmap(), SyscallSucceeds());
    71      }
    72    }
    73  
    74    // Remembers mapping, so it can be automatically unmapped.
    75    uintptr_t Map(uintptr_t addr, size_t length, int prot, int flags, int fd,
    76                  off_t offset) {
    77      void* ret =
    78          mmap(reinterpret_cast<void*>(addr), length, prot, flags, fd, offset);
    79  
    80      if (ret != MAP_FAILED) {
    81        addr_ = ret;
    82        length_ = length;
    83      }
    84  
    85      return reinterpret_cast<uintptr_t>(ret);
    86    }
    87  
    88    // Unmap previous mapping
    89    int Unmap() {
    90      if (!addr_) {
    91        return -1;
    92      }
    93  
    94      int ret = munmap(addr_, length_);
    95  
    96      addr_ = nullptr;
    97      length_ = 0;
    98  
    99      return ret;
   100    }
   101  
   102    // Msync the mapping.
   103    int Msync() { return msync(addr_, length_, MS_SYNC); }
   104  
   105    // Mlock the mapping.
   106    int Mlock() { return mlock(addr_, length_); }
   107  
   108    // Munlock the mapping.
   109    int Munlock() { return munlock(addr_, length_); }
   110  
   111    int Protect(uintptr_t addr, size_t length, int prot) {
   112      return mprotect(reinterpret_cast<void*>(addr), length, prot);
   113    }
   114  
   115    void* addr_ = nullptr;
   116    size_t length_ = 0;
   117  };
   118  
   119  // Matches if arg contains the same contents as string str.
   120  MATCHER_P(EqualsMemory, str, "") {
   121    if (0 == memcmp(arg, str.c_str(), str.size())) {
   122      return true;
   123    }
   124  
   125    *result_listener << "Memory did not match. Got:\n"
   126                     << absl::BytesToHexString(
   127                            std::string(static_cast<char*>(arg), str.size()))
   128                     << "Want:\n"
   129                     << absl::BytesToHexString(str);
   130    return false;
   131  }
   132  
   133  // We can't map pipes, but for different reasons.
   134  TEST_F(MMapTest, MapPipe) {
   135    int fds[2];
   136    ASSERT_THAT(pipe(fds), SyscallSucceeds());
   137    EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[0], 0),
   138                SyscallFailsWithErrno(ENODEV));
   139    EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[1], 0),
   140                SyscallFailsWithErrno(EACCES));
   141    ASSERT_THAT(close(fds[0]), SyscallSucceeds());
   142    ASSERT_THAT(close(fds[1]), SyscallSucceeds());
   143  }
   144  
   145  // It's very common to mmap /dev/zero because anonymous mappings aren't part
   146  // of POSIX although they are widely supported. So a zero initialized memory
   147  // region would actually come from a "file backed" /dev/zero mapping.
   148  TEST_F(MMapTest, MapDevZeroShared) {
   149    // This test will verify that we're able to map a page backed by /dev/zero
   150    // as MAP_SHARED.
   151    const FileDescriptor dev_zero =
   152        ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
   153  
   154    // Test that we can create a RW SHARED mapping of /dev/zero.
   155    ASSERT_THAT(
   156        Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
   157        SyscallSucceeds());
   158  }
   159  
   160  TEST_F(MMapTest, MapDevZeroPrivate) {
   161    // This test will verify that we're able to map a page backed by /dev/zero
   162    // as MAP_PRIVATE.
   163    const FileDescriptor dev_zero =
   164        ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
   165  
   166    // Test that we can create a RW SHARED mapping of /dev/zero.
   167    ASSERT_THAT(
   168        Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0),
   169        SyscallSucceeds());
   170  }
   171  
   172  TEST_F(MMapTest, MapDevZeroNoPersistence) {
   173    // This test will verify that two independent mappings of /dev/zero do not
   174    // appear to reference the same "backed file."
   175  
   176    const FileDescriptor dev_zero1 =
   177        ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
   178    const FileDescriptor dev_zero2 =
   179        ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
   180  
   181    ASSERT_THAT(
   182        Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero1.get(), 0),
   183        SyscallSucceeds());
   184  
   185    // Create a second mapping via the second /dev/zero fd.
   186    void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
   187                          dev_zero2.get(), 0);
   188    ASSERT_THAT(reinterpret_cast<intptr_t>(psec_map), SyscallSucceeds());
   189  
   190    // Always unmap.
   191    auto cleanup_psec_map = Cleanup(
   192        [&] { EXPECT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); });
   193  
   194    // Verify that we have independently addressed pages.
   195    ASSERT_NE(psec_map, addr_);
   196  
   197    std::string buf_zero(kPageSize, 0x00);
   198    std::string buf_ones(kPageSize, 0xFF);
   199  
   200    // Verify the first is actually all zeros after mmap.
   201    EXPECT_THAT(addr_, EqualsMemory(buf_zero));
   202  
   203    // Let's fill in the first mapping with 0xFF.
   204    memcpy(addr_, buf_ones.data(), kPageSize);
   205  
   206    // Verify that the memcpy actually stuck in the page.
   207    EXPECT_THAT(addr_, EqualsMemory(buf_ones));
   208  
   209    // Verify that it didn't affect the second page which should be all zeros.
   210    EXPECT_THAT(psec_map, EqualsMemory(buf_zero));
   211  }
   212  
   213  TEST_F(MMapTest, MapDevZeroSharedMultiplePages) {
   214    // This will test that we're able to map /dev/zero over multiple pages.
   215    const FileDescriptor dev_zero =
   216        ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
   217  
   218    // Test that we can create a RW SHARED mapping of /dev/zero.
   219    ASSERT_THAT(Map(0, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE,
   220                    dev_zero.get(), 0),
   221                SyscallSucceeds());
   222  
   223    std::string buf_zero(kPageSize * 2, 0x00);
   224    std::string buf_ones(kPageSize * 2, 0xFF);
   225  
   226    // Verify the two pages are actually all zeros after mmap.
   227    EXPECT_THAT(addr_, EqualsMemory(buf_zero));
   228  
   229    // Fill out the pages with all ones.
   230    memcpy(addr_, buf_ones.data(), kPageSize * 2);
   231  
   232    // Verify that the memcpy actually stuck in the pages.
   233    EXPECT_THAT(addr_, EqualsMemory(buf_ones));
   234  }
   235  
   236  TEST_F(MMapTest, MapDevZeroSharedFdNoPersistence) {
   237    // This test will verify that two independent mappings of /dev/zero do not
   238    // appear to reference the same "backed file" even when mapped from the
   239    // same initial fd.
   240    const FileDescriptor dev_zero =
   241        ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
   242  
   243    ASSERT_THAT(
   244        Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
   245        SyscallSucceeds());
   246  
   247    // Create a second mapping via the same fd.
   248    void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
   249                          dev_zero.get(), 0);
   250    ASSERT_THAT(reinterpret_cast<int64_t>(psec_map), SyscallSucceeds());
   251  
   252    // Always unmap.
   253    auto cleanup_psec_map = Cleanup(
   254        [&] { ASSERT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); });
   255  
   256    // Verify that we have independently addressed pages.
   257    ASSERT_NE(psec_map, addr_);
   258  
   259    std::string buf_zero(kPageSize, 0x00);
   260    std::string buf_ones(kPageSize, 0xFF);
   261  
   262    // Verify the first is actually all zeros after mmap.
   263    EXPECT_THAT(addr_, EqualsMemory(buf_zero));
   264  
   265    // Let's fill in the first mapping with 0xFF.
   266    memcpy(addr_, buf_ones.data(), kPageSize);
   267  
   268    // Verify that the memcpy actually stuck in the page.
   269    EXPECT_THAT(addr_, EqualsMemory(buf_ones));
   270  
   271    // Verify that it didn't affect the second page which should be all zeros.
   272    EXPECT_THAT(psec_map, EqualsMemory(buf_zero));
   273  }
   274  
   275  TEST_F(MMapTest, MapDevZeroSegfaultAfterUnmap) {
   276    SetupGvisorDeathTest();
   277  
   278    // This test will verify that we're able to map a page backed by /dev/zero
   279    // as MAP_SHARED and after it's unmapped any access results in a SIGSEGV.
   280    // This test is redundant but given the special nature of /dev/zero mappings
   281    // it doesn't hurt.
   282    const FileDescriptor dev_zero =
   283        ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
   284  
   285    const auto rest = [&] {
   286      // Test that we can create a RW SHARED mapping of /dev/zero.
   287      TEST_PCHECK(Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
   288                      dev_zero.get(),
   289                      0) != reinterpret_cast<uintptr_t>(MAP_FAILED));
   290  
   291      // Confirm that accesses after the unmap result in a SIGSEGV.
   292      //
   293      // N.B. We depend on this process being single-threaded to ensure there
   294      // can't be another mmap to map addr before the dereference below.
   295      void* addr_saved = addr_;  // Unmap resets addr_.
   296      TEST_PCHECK(Unmap() == 0);
   297      *reinterpret_cast<volatile int*>(addr_saved) = 0xFF;
   298    };
   299  
   300    EXPECT_THAT(InForkedProcess(rest),
   301                IsPosixErrorOkAndHolds(AnyOf(Eq(W_EXITCODE(0, SIGSEGV)),
   302                                             Eq(W_EXITCODE(0, 128 + SIGSEGV)))));
   303  }
   304  
   305  TEST_F(MMapTest, MapDevZeroUnaligned) {
   306    const FileDescriptor dev_zero =
   307        ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
   308    const size_t size = kPageSize + kPageSize / 2;
   309    const std::string buf_zero(size, 0x00);
   310  
   311    ASSERT_THAT(
   312        Map(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
   313        SyscallSucceeds());
   314    EXPECT_THAT(addr_, EqualsMemory(buf_zero));
   315    ASSERT_THAT(Unmap(), SyscallSucceeds());
   316  
   317    ASSERT_THAT(
   318        Map(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0),
   319        SyscallSucceeds());
   320    EXPECT_THAT(addr_, EqualsMemory(buf_zero));
   321  }
   322  
   323  // We can't map _some_ character devices.
   324  TEST_F(MMapTest, MapCharDevice) {
   325    const FileDescriptor cdevfd =
   326        ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/random", 0, 0));
   327    EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, cdevfd.get(), 0),
   328                SyscallFailsWithErrno(ENODEV));
   329  }
   330  
   331  // We can't map directories.
   332  TEST_F(MMapTest, MapDirectory) {
   333    const FileDescriptor dirfd =
   334        ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), 0, 0));
   335    EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, dirfd.get(), 0),
   336                SyscallFailsWithErrno(ENODEV));
   337  }
   338  
   339  // We can map *something*
   340  TEST_F(MMapTest, MapAnything) {
   341    EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   342                SyscallSucceedsWithValue(Gt(0)));
   343  }
   344  
   345  // Map length < PageSize allowed
   346  TEST_F(MMapTest, SmallMap) {
   347    EXPECT_THAT(Map(0, 128, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   348                SyscallSucceeds());
   349  }
   350  
   351  // Hint address doesn't break anything.
   352  // Note: there is no requirement we actually get the hint address
   353  TEST_F(MMapTest, HintAddress) {
   354    EXPECT_THAT(
   355        Map(0x30000000, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   356        SyscallSucceeds());
   357  }
   358  
   359  // MAP_FIXED gives us exactly the requested address
   360  TEST_F(MMapTest, MapFixed) {
   361    EXPECT_THAT(Map(0x30000000, kPageSize, PROT_NONE,
   362                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0),
   363                SyscallSucceedsWithValue(0x30000000));
   364  }
   365  
   366  // 64-bit addresses work too
   367  #if defined(__x86_64__) || defined(__aarch64__)
   368  TEST_F(MMapTest, MapFixed64) {
   369    EXPECT_THAT(Map(0x300000000000, kPageSize, PROT_NONE,
   370                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0),
   371                SyscallSucceedsWithValue(0x300000000000));
   372  }
   373  #endif
   374  
   375  // MAP_STACK allowed.
   376  // There isn't a good way to verify it did anything.
   377  TEST_F(MMapTest, MapStack) {
   378    EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
   379                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0),
   380                SyscallSucceeds());
   381  }
   382  
   383  // MAP_LOCKED allowed.
   384  // There isn't a good way to verify it did anything.
   385  TEST_F(MMapTest, MapLocked) {
   386    EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
   387                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0),
   388                SyscallSucceeds());
   389  }
   390  
   391  // MAP_PRIVATE or MAP_SHARED must be passed
   392  TEST_F(MMapTest, NotPrivateOrShared) {
   393    EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_ANONYMOUS, -1, 0),
   394                SyscallFailsWithErrno(EINVAL));
   395  }
   396  
   397  // Only one of MAP_PRIVATE or MAP_SHARED may be passed
   398  TEST_F(MMapTest, PrivateAndShared) {
   399    EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
   400                    MAP_PRIVATE | MAP_SHARED | MAP_ANONYMOUS, -1, 0),
   401                SyscallFailsWithErrno(EINVAL));
   402  }
   403  
   404  TEST_F(MMapTest, FixedAlignment) {
   405    // Addr must be page aligned (MAP_FIXED)
   406    EXPECT_THAT(Map(0x30000001, kPageSize, PROT_NONE,
   407                    MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0),
   408                SyscallFailsWithErrno(EINVAL));
   409  }
   410  
   411  // Non-MAP_FIXED address does not need to be page aligned
   412  TEST_F(MMapTest, NonFixedAlignment) {
   413    EXPECT_THAT(
   414        Map(0x30000001, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   415        SyscallSucceeds());
   416  }
   417  
   418  // Length = 0 results in EINVAL.
   419  TEST_F(MMapTest, InvalidLength) {
   420    EXPECT_THAT(Map(0, 0, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   421                SyscallFailsWithErrno(EINVAL));
   422  }
   423  
   424  // Bad fd not allowed.
   425  TEST_F(MMapTest, BadFd) {
   426    EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE, 999, 0),
   427                SyscallFailsWithErrno(EBADF));
   428  }
   429  
   430  // Mappings are writable.
   431  TEST_F(MMapTest, ProtWrite) {
   432    uint64_t addr;
   433    constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
   434  
   435    EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
   436                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   437                SyscallSucceeds());
   438  
   439    // This shouldn't cause a SIGSEGV.
   440    memset(reinterpret_cast<void*>(addr), 42, kPageSize);
   441  
   442    // The written data should actually be there.
   443    EXPECT_EQ(
   444        0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
   445  }
   446  
   447  // "Write-only" mappings are writable *and* readable.
   448  TEST_F(MMapTest, ProtWriteOnly) {
   449    uint64_t addr;
   450    constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
   451  
   452    EXPECT_THAT(
   453        addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   454        SyscallSucceeds());
   455  
   456    // This shouldn't cause a SIGSEGV.
   457    memset(reinterpret_cast<void*>(addr), 42, kPageSize);
   458  
   459    // The written data should actually be there.
   460    EXPECT_EQ(
   461        0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
   462  }
   463  
   464  // "Write-only" mappings are readable.
   465  //
   466  // This is distinct from above to ensure the page is accessible even if the
   467  // initial fault is a write fault.
   468  TEST_F(MMapTest, ProtWriteOnlyReadable) {
   469    uint64_t addr;
   470    constexpr uint64_t kFirstWord = 0;
   471  
   472    EXPECT_THAT(
   473        addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   474        SyscallSucceeds());
   475  
   476    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), &kFirstWord,
   477                        sizeof(kFirstWord)));
   478  }
   479  
   480  // Mappings are writable after mprotect from PROT_NONE to PROT_READ|PROT_WRITE.
   481  TEST_F(MMapTest, ProtectProtWrite) {
   482    uint64_t addr;
   483    constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
   484  
   485    EXPECT_THAT(
   486        addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   487        SyscallSucceeds());
   488  
   489    ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE),
   490                SyscallSucceeds());
   491  
   492    // This shouldn't cause a SIGSEGV.
   493    memset(reinterpret_cast<void*>(addr), 42, kPageSize);
   494  
   495    // The written data should actually be there.
   496    EXPECT_EQ(
   497        0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
   498  }
   499  
   500  // SIGSEGV raised when reading PROT_NONE memory
   501  TEST_F(MMapTest, ProtNoneDeath) {
   502    SetupGvisorDeathTest();
   503  
   504    uintptr_t addr;
   505  
   506    ASSERT_THAT(
   507        addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   508        SyscallSucceeds());
   509  
   510    EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr),
   511                ::testing::KilledBySignal(SIGSEGV), "");
   512  }
   513  
   514  // SIGSEGV raised when writing PROT_READ only memory
   515  TEST_F(MMapTest, ReadOnlyDeath) {
   516    SetupGvisorDeathTest();
   517  
   518    uintptr_t addr;
   519  
   520    ASSERT_THAT(
   521        addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   522        SyscallSucceeds());
   523  
   524    EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr) = 42,
   525                ::testing::KilledBySignal(SIGSEGV), "");
   526  }
   527  
   528  // Writable mapping mprotect'd to read-only should not be writable.
   529  TEST_F(MMapTest, MprotectReadOnlyDeath) {
   530    SetupGvisorDeathTest();
   531  
   532    uintptr_t addr;
   533  
   534    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
   535                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   536                SyscallSucceeds());
   537  
   538    volatile int* val = reinterpret_cast<int*>(addr);
   539  
   540    // Copy to ensure page is mapped in.
   541    *val = 42;
   542  
   543    ASSERT_THAT(Protect(addr, kPageSize, PROT_READ), SyscallSucceeds());
   544  
   545    // Now it shouldn't be writable.
   546    EXPECT_EXIT(*val = 0, ::testing::KilledBySignal(SIGSEGV), "");
   547  }
   548  
   549  // Verify that calling mprotect an address that's not page aligned fails.
   550  TEST_F(MMapTest, MprotectNotPageAligned) {
   551    uintptr_t addr;
   552  
   553    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
   554                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   555                SyscallSucceeds());
   556    ASSERT_THAT(Protect(addr + 1, kPageSize - 1, PROT_READ),
   557                SyscallFailsWithErrno(EINVAL));
   558  }
   559  
   560  // Verify that calling mprotect with an absurdly huge length fails.
   561  TEST_F(MMapTest, MprotectHugeLength) {
   562    uintptr_t addr;
   563  
   564    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
   565                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   566                SyscallSucceeds());
   567    ASSERT_THAT(Protect(addr, static_cast<size_t>(-1), PROT_READ),
   568                SyscallFailsWithErrno(ENOMEM));
   569  }
   570  
   571  #if defined(__x86_64__) || defined(__i386__)
   572  // This code is equivalent in 32 and 64-bit mode
   573  const uint8_t machine_code[] = {
   574      0xb8, 0x2a, 0x00, 0x00, 0x00,  // movl $42, %eax
   575      0xc3,                          // retq
   576  };
   577  #elif defined(__aarch64__)
   578  const uint8_t machine_code[] = {
   579      0x40, 0x05, 0x80, 0x52,  // mov w0, #42
   580      0xc0, 0x03, 0x5f, 0xd6,  // ret
   581  };
   582  #endif
   583  
   584  // PROT_EXEC allows code execution
   585  TEST_F(MMapTest, ProtExec) {
   586    uintptr_t addr;
   587    uint32_t (*func)(void);
   588  
   589    EXPECT_THAT(addr = Map(0, kPageSize, PROT_EXEC | PROT_READ | PROT_WRITE,
   590                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   591                SyscallSucceeds());
   592  
   593    memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code));
   594  
   595  #if defined(__aarch64__)
   596    // We use this as a memory barrier for Arm64.
   597    ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_EXEC),
   598                SyscallSucceeds());
   599  #endif
   600  
   601    func = reinterpret_cast<uint32_t (*)(void)>(addr);
   602  
   603    EXPECT_EQ(42, func());
   604  }
   605  
   606  // No PROT_EXEC disallows code execution
   607  TEST_F(MMapTest, NoProtExecDeath) {
   608    SetupGvisorDeathTest();
   609  
   610    uintptr_t addr;
   611    uint32_t (*func)(void);
   612  
   613    EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
   614                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   615                SyscallSucceeds());
   616  
   617    memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code));
   618  
   619    func = reinterpret_cast<uint32_t (*)(void)>(addr);
   620  
   621    EXPECT_EXIT(func(), ::testing::KilledBySignal(SIGSEGV), "");
   622  }
   623  
   624  TEST_F(MMapTest, NoExceedLimitData) {
   625    void* prevbrk;
   626    void* target_brk;
   627    struct rlimit setlim;
   628  
   629    prevbrk = sbrk(0);
   630    ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
   631    target_brk = reinterpret_cast<char*>(prevbrk) + 1;
   632  
   633    setlim.rlim_cur = RLIM_INFINITY;
   634    setlim.rlim_max = RLIM_INFINITY;
   635    ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
   636    EXPECT_THAT(brk(target_brk), SyscallSucceedsWithValue(0));
   637  }
   638  
   639  TEST_F(MMapTest, ExceedLimitData) {
   640    // To unit test this more precisely, we'd need access to the mm's start_brk
   641    // and end_brk, which we don't have direct access to :/
   642    void* prevbrk;
   643    void* target_brk;
   644    struct rlimit setlim;
   645  
   646    prevbrk = sbrk(0);
   647    ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
   648    target_brk = reinterpret_cast<char*>(prevbrk) + 8192;
   649  
   650    setlim.rlim_cur = 0;
   651    setlim.rlim_max = RLIM_INFINITY;
   652    // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
   653    // Reset RLIMIT_DATA during teardown step.
   654    ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
   655    EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
   656    // Teardown step...
   657    setlim.rlim_cur = RLIM_INFINITY;
   658    ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
   659  }
   660  
   661  TEST_F(MMapTest, ExceedLimitDataPrlimit) {
   662    // To unit test this more precisely, we'd need access to the mm's start_brk
   663    // and end_brk, which we don't have direct access to :/
   664    void* prevbrk;
   665    void* target_brk;
   666    struct rlimit setlim;
   667  
   668    prevbrk = sbrk(0);
   669    ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
   670    target_brk = reinterpret_cast<char*>(prevbrk) + 8192;
   671  
   672    setlim.rlim_cur = 0;
   673    setlim.rlim_max = RLIM_INFINITY;
   674    // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
   675    // Reset RLIMIT_DATA during teardown step.
   676    ASSERT_THAT(prlimit(0, RLIMIT_DATA, &setlim, nullptr), SyscallSucceeds());
   677    EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
   678    // Teardown step...
   679    setlim.rlim_cur = RLIM_INFINITY;
   680    ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
   681  }
   682  
   683  TEST_F(MMapTest, ExceedLimitDataPrlimitPID) {
   684    // To unit test this more precisely, we'd need access to the mm's start_brk
   685    // and end_brk, which we don't have direct access to :/
   686    void* prevbrk;
   687    void* target_brk;
   688    struct rlimit setlim;
   689  
   690    prevbrk = sbrk(0);
   691    ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
   692    target_brk = reinterpret_cast<char*>(prevbrk) + 8192;
   693  
   694    setlim.rlim_cur = 0;
   695    setlim.rlim_max = RLIM_INFINITY;
   696    // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
   697    // Reset RLIMIT_DATA during teardown step.
   698    ASSERT_THAT(prlimit(syscall(__NR_gettid), RLIMIT_DATA, &setlim, nullptr),
   699                SyscallSucceeds());
   700    EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
   701    // Teardown step...
   702    setlim.rlim_cur = RLIM_INFINITY;
   703    ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
   704  }
   705  
   706  TEST_F(MMapTest, NoExceedLimitAS) {
   707    constexpr uint64_t kAllocBytes = 200 << 20;
   708    // Add some headroom to the AS limit in case of e.g. unexpected stack
   709    // expansion.
   710    constexpr uint64_t kExtraASBytes = kAllocBytes + (20 << 20);
   711    static_assert(kAllocBytes < kExtraASBytes,
   712                  "test depends on allocation not exceeding AS limit");
   713  
   714    auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize());
   715    struct rlimit setlim;
   716    setlim.rlim_cur = vss + kExtraASBytes;
   717    setlim.rlim_max = RLIM_INFINITY;
   718    ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds());
   719    EXPECT_THAT(
   720        Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   721        SyscallSucceedsWithValue(Gt(0)));
   722  }
   723  
   724  TEST_F(MMapTest, ExceedLimitAS) {
   725    constexpr uint64_t kAllocBytes = 200 << 20;
   726    // Add some headroom to the AS limit in case of e.g. unexpected stack
   727    // expansion.
   728    constexpr uint64_t kExtraASBytes = 20 << 20;
   729    static_assert(kAllocBytes > kExtraASBytes,
   730                  "test depends on allocation exceeding AS limit");
   731  
   732    auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize());
   733    struct rlimit setlim;
   734    setlim.rlim_cur = vss + kExtraASBytes;
   735    setlim.rlim_max = RLIM_INFINITY;
   736    ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds());
   737    EXPECT_THAT(
   738        Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   739        SyscallFailsWithErrno(ENOMEM));
   740  }
   741  
   742  // Tests that setting an anonymous mmap to PROT_NONE doesn't free the memory.
   743  TEST_F(MMapTest, SettingProtNoneDoesntFreeMemory) {
   744    uintptr_t addr;
   745    constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
   746  
   747    EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
   748                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
   749                SyscallSucceedsWithValue(Gt(0)));
   750  
   751    memset(reinterpret_cast<void*>(addr), 42, kPageSize);
   752  
   753    ASSERT_THAT(Protect(addr, kPageSize, PROT_NONE), SyscallSucceeds());
   754    ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE),
   755                SyscallSucceeds());
   756  
   757    // The written data should still be there.
   758    EXPECT_EQ(
   759        0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
   760  }
   761  
   762  constexpr char kFileContents[] = "Hello World!";
   763  
   764  class MMapFileTest : public MMapTest {
   765   protected:
   766    FileDescriptor fd_;
   767    std::string filename_;
   768  
   769    // Open a file for read/write
   770    void SetUp() override {
   771      MMapTest::SetUp();
   772  
   773      filename_ = NewTempAbsPath();
   774      fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_CREAT | O_RDWR, 0644));
   775  
   776      // Extend file so it can be written once mapped. Deliberately make the file
   777      // only half a page in size, so we can test what happens when we access the
   778      // second half.
   779      // Use ftruncate(2) once the sentry supports it.
   780      char zero = 0;
   781      size_t count = 0;
   782      do {
   783        const DisableSave ds;  // saving 2048 times is slow and useless.
   784        Write(&zero, 1), SyscallSucceedsWithValue(1);
   785      } while (++count < (kPageSize / 2));
   786      ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
   787    }
   788  
   789    // Close and delete file
   790    void TearDown() override {
   791      MMapTest::TearDown();
   792      fd_.reset();  // Make sure the files is closed before we unlink it.
   793      ASSERT_THAT(unlink(filename_.c_str()), SyscallSucceeds());
   794    }
   795  
   796    ssize_t Read(char* buf, size_t count) {
   797      ssize_t len = 0;
   798      do {
   799        ssize_t ret = read(fd_.get(), buf, count);
   800        if (ret < 0) {
   801          return ret;
   802        } else if (ret == 0) {
   803          return len;
   804        }
   805  
   806        len += ret;
   807        buf += ret;
   808      } while (len < static_cast<ssize_t>(count));
   809  
   810      return len;
   811    }
   812  
   813    ssize_t Write(const char* buf, size_t count) {
   814      ssize_t len = 0;
   815      do {
   816        ssize_t ret = write(fd_.get(), buf, count);
   817        if (ret < 0) {
   818          return ret;
   819        } else if (ret == 0) {
   820          return len;
   821        }
   822  
   823        len += ret;
   824        buf += ret;
   825      } while (len < static_cast<ssize_t>(count));
   826  
   827      return len;
   828    }
   829  };
   830  
   831  class MMapFileParamTest
   832      : public MMapFileTest,
   833        public ::testing::WithParamInterface<std::tuple<int, int>> {
   834   protected:
   835    int prot() const { return std::get<0>(GetParam()); }
   836  
   837    int flags() const { return std::get<1>(GetParam()); }
   838  };
   839  
   840  // MAP_POPULATE allowed.
   841  // There isn't a good way to verify it actually did anything.
   842  TEST_P(MMapFileParamTest, MapPopulate) {
   843    ASSERT_THAT(Map(0, kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0),
   844                SyscallSucceeds());
   845  }
   846  
   847  // MAP_POPULATE on a short file.
   848  TEST_P(MMapFileParamTest, MapPopulateShort) {
   849    ASSERT_THAT(
   850        Map(0, 2 * kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0),
   851        SyscallSucceeds());
   852  }
   853  
   854  // Read contents from mapped file.
   855  TEST_F(MMapFileTest, Read) {
   856    size_t len = strlen(kFileContents);
   857    ASSERT_EQ(len, Write(kFileContents, len));
   858  
   859    uintptr_t addr;
   860    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), 0),
   861                SyscallSucceeds());
   862  
   863    EXPECT_THAT(reinterpret_cast<char*>(addr),
   864                EqualsMemory(std::string(kFileContents)));
   865  }
   866  
   867  // Map at an offset.
   868  TEST_F(MMapFileTest, MapOffset) {
   869    ASSERT_THAT(lseek(fd_.get(), kPageSize, SEEK_SET), SyscallSucceeds());
   870  
   871    size_t len = strlen(kFileContents);
   872    ASSERT_EQ(len, Write(kFileContents, len));
   873  
   874    uintptr_t addr;
   875    ASSERT_THAT(
   876        addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), kPageSize),
   877        SyscallSucceeds());
   878  
   879    EXPECT_THAT(reinterpret_cast<char*>(addr),
   880                EqualsMemory(std::string(kFileContents)));
   881  }
   882  
   883  TEST_F(MMapFileTest, MapOffsetBeyondEnd) {
   884    SetupGvisorDeathTest();
   885  
   886    uintptr_t addr;
   887    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
   888                           fd_.get(), 10 * kPageSize),
   889                SyscallSucceeds());
   890  
   891    // Touching the memory causes SIGBUS.
   892    size_t len = strlen(kFileContents);
   893    EXPECT_EXIT(std::copy(kFileContents, kFileContents + len,
   894                          reinterpret_cast<volatile char*>(addr)),
   895                ::testing::KilledBySignal(SIGBUS), "");
   896  }
   897  
   898  // Verify mmap fails when sum of length and offset overflows.
   899  TEST_F(MMapFileTest, MapLengthPlusOffsetOverflows) {
   900    const size_t length = static_cast<size_t>(-kPageSize);
   901    const off_t offset = kPageSize;
   902    ASSERT_THAT(Map(0, length, PROT_READ, MAP_PRIVATE, fd_.get(), offset),
   903                SyscallFailsWithErrno(ENOMEM));
   904  }
   905  
   906  // MAP_PRIVATE PROT_WRITE is allowed on read-only FDs.
   907  TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) {
   908    const FileDescriptor fd =
   909        ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY));
   910  
   911    uintptr_t addr;
   912    EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
   913                           fd.get(), 0),
   914                SyscallSucceeds());
   915  
   916    // Touch the page to ensure the kernel didn't lie about writability.
   917    size_t len = strlen(kFileContents);
   918    std::copy(kFileContents, kFileContents + len,
   919              reinterpret_cast<volatile char*>(addr));
   920  }
   921  
   922  // MAP_SHARED PROT_WRITE not allowed on read-only FDs.
   923  TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) {
   924    const FileDescriptor fd =
   925        ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY));
   926  
   927    uintptr_t addr;
   928    EXPECT_THAT(
   929        addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0),
   930        SyscallFailsWithErrno(EACCES));
   931  }
   932  
   933  // Mmap not allowed on O_PATH FDs.
   934  TEST_F(MMapFileTest, MmapFileWithOpath) {
   935    SKIP_IF(IsRunningWithVFS1());
   936    const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
   937    const FileDescriptor fd =
   938        ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_PATH));
   939  
   940    uintptr_t addr;
   941    EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd.get(), 0),
   942                SyscallFailsWithErrno(EBADF));
   943  }
   944  
   945  // The FD must be readable.
   946  TEST_P(MMapFileParamTest, WriteOnlyFd) {
   947    const FileDescriptor fd =
   948        ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY));
   949  
   950    uintptr_t addr;
   951    EXPECT_THAT(addr = Map(0, kPageSize, prot(), flags(), fd.get(), 0),
   952                SyscallFailsWithErrno(EACCES));
   953  }
   954  
   955  // Overwriting the contents of a file mapped MAP_SHARED PROT_READ
   956  // should cause the new data to be reflected in the mapping.
   957  TEST_F(MMapFileTest, ReadSharedConsistentWithOverwrite) {
   958    // Start from scratch.
   959    EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
   960  
   961    // Expand the file to two pages and dirty them.
   962    std::string bufA(kPageSize, 'a');
   963    ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
   964                SyscallSucceedsWithValue(bufA.size()));
   965    std::string bufB(kPageSize, 'b');
   966    ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
   967                SyscallSucceedsWithValue(bufB.size()));
   968  
   969    // Map the page.
   970    uintptr_t addr;
   971    ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
   972                SyscallSucceeds());
   973  
   974    // Check that the mapping contains the right file data.
   975    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize));
   976    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(),
   977                        kPageSize));
   978  
   979    // Start at the beginning of the file.
   980    ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
   981  
   982    // Swap the write pattern.
   983    ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
   984                SyscallSucceedsWithValue(bufB.size()));
   985    ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
   986                SyscallSucceedsWithValue(bufA.size()));
   987  
   988    // Check that the mapping got updated.
   989    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufB.c_str(), kPageSize));
   990    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufA.c_str(),
   991                        kPageSize));
   992  }
   993  
   994  // Partially overwriting a file mapped MAP_SHARED PROT_READ should be reflected
   995  // in the mapping.
   996  TEST_F(MMapFileTest, ReadSharedConsistentWithPartialOverwrite) {
   997    // Start from scratch.
   998    EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
   999  
  1000    // Expand the file to two pages and dirty them.
  1001    std::string bufA(kPageSize, 'a');
  1002    ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
  1003                SyscallSucceedsWithValue(bufA.size()));
  1004    std::string bufB(kPageSize, 'b');
  1005    ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
  1006                SyscallSucceedsWithValue(bufB.size()));
  1007  
  1008    // Map the page.
  1009    uintptr_t addr;
  1010    ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
  1011                SyscallSucceeds());
  1012  
  1013    // Check that the mapping contains the right file data.
  1014    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize));
  1015    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(),
  1016                        kPageSize));
  1017  
  1018    // Start at the beginning of the file.
  1019    ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
  1020  
  1021    // Do a partial overwrite, spanning both pages.
  1022    std::string bufC(kPageSize + (kPageSize / 2), 'c');
  1023    ASSERT_THAT(Write(bufC.c_str(), bufC.size()),
  1024                SyscallSucceedsWithValue(bufC.size()));
  1025  
  1026    // Check that the mapping got updated.
  1027    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufC.c_str(),
  1028                        kPageSize + (kPageSize / 2)));
  1029    EXPECT_EQ(0,
  1030              memcmp(reinterpret_cast<void*>(addr + kPageSize + (kPageSize / 2)),
  1031                     bufB.c_str(), kPageSize / 2));
  1032  }
  1033  
  1034  // Overwriting a file mapped MAP_SHARED PROT_READ should be reflected in the
  1035  // mapping and the file.
  1036  TEST_F(MMapFileTest, ReadSharedConsistentWithWriteAndFile) {
  1037    // Start from scratch.
  1038    EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
  1039  
  1040    // Expand the file to two full pages and dirty it.
  1041    std::string bufA(2 * kPageSize, 'a');
  1042    ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
  1043                SyscallSucceedsWithValue(bufA.size()));
  1044  
  1045    // Map only the first page.
  1046    uintptr_t addr;
  1047    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
  1048                SyscallSucceeds());
  1049  
  1050    // Prepare to overwrite the file contents.
  1051    ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
  1052  
  1053    // Overwrite everything, beyond the mapped portion.
  1054    std::string bufB(2 * kPageSize, 'b');
  1055    ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
  1056                SyscallSucceedsWithValue(bufB.size()));
  1057  
  1058    // What the mapped portion should now look like.
  1059    std::string bufMapped(kPageSize, 'b');
  1060  
  1061    // Expect that the mapped portion is consistent.
  1062    EXPECT_EQ(
  1063        0, memcmp(reinterpret_cast<void*>(addr), bufMapped.c_str(), kPageSize));
  1064  
  1065    // Prepare to read the entire file contents.
  1066    ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
  1067  
  1068    // Expect that the file was fully updated.
  1069    std::vector<char> bufFile(2 * kPageSize);
  1070    ASSERT_THAT(Read(bufFile.data(), bufFile.size()),
  1071                SyscallSucceedsWithValue(bufFile.size()));
  1072    // Cast to void* to avoid EXPECT_THAT assuming bufFile.data() is a
  1073    // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
  1074    // std::string, possibly overruning the buffer.
  1075    EXPECT_THAT(reinterpret_cast<void*>(bufFile.data()), EqualsMemory(bufB));
  1076  }
  1077  
  1078  // Write data to mapped file.
  1079  TEST_F(MMapFileTest, WriteShared) {
  1080    uintptr_t addr;
  1081    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
  1082                           fd_.get(), 0),
  1083                SyscallSucceeds());
  1084  
  1085    size_t len = strlen(kFileContents);
  1086    memcpy(reinterpret_cast<void*>(addr), kFileContents, len);
  1087  
  1088    // The file may not actually be updated until munmap is called.
  1089    ASSERT_THAT(Unmap(), SyscallSucceeds());
  1090  
  1091    std::vector<char> buf(len);
  1092    ASSERT_THAT(Read(buf.data(), buf.size()),
  1093                SyscallSucceedsWithValue(buf.size()));
  1094    // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
  1095    // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
  1096    // string, possibly overruning the buffer.
  1097    EXPECT_THAT(reinterpret_cast<void*>(buf.data()),
  1098                EqualsMemory(std::string(kFileContents)));
  1099  }
  1100  
  1101  // Write data to portion of mapped page beyond the end of the file.
  1102  // These writes are not reflected in the file.
  1103  TEST_F(MMapFileTest, WriteSharedBeyondEnd) {
  1104    // The file is only half of a page. We map an entire page. Writes to the
  1105    // end of the mapping must not be reflected in the file.
  1106    uintptr_t addr;
  1107    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
  1108                           fd_.get(), 0),
  1109                SyscallSucceeds());
  1110  
  1111    // First half; this is reflected in the file.
  1112    std::string first(kPageSize / 2, 'A');
  1113    memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());
  1114  
  1115    // Second half; this is not reflected in the file.
  1116    std::string second(kPageSize / 2, 'B');
  1117    memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(),
  1118           second.size());
  1119  
  1120    // The file may not actually be updated until munmap is called.
  1121    ASSERT_THAT(Unmap(), SyscallSucceeds());
  1122  
  1123    // Big enough to fit the entire page, if the writes are mistakenly written to
  1124    // the file.
  1125    std::vector<char> buf(kPageSize);
  1126  
  1127    // Only the first half is in the file.
  1128    ASSERT_THAT(Read(buf.data(), buf.size()),
  1129                SyscallSucceedsWithValue(first.size()));
  1130    // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
  1131    // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
  1132    // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
  1133    // std::string, possibly overruning the buffer.
  1134    EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first));
  1135  }
  1136  
  1137  // The portion of a mapped page that becomes part of the file after a truncate
  1138  // is reflected in the file.
  1139  TEST_F(MMapFileTest, WriteSharedTruncateUp) {
  1140    // The file is only half of a page. We map an entire page. Writes to the
  1141    // end of the mapping must not be reflected in the file.
  1142    uintptr_t addr;
  1143    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
  1144                           fd_.get(), 0),
  1145                SyscallSucceeds());
  1146  
  1147    // First half; this is reflected in the file.
  1148    std::string first(kPageSize / 2, 'A');
  1149    memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());
  1150  
  1151    // Second half; this is not reflected in the file now (see
  1152    // WriteSharedBeyondEnd), but will be after the truncate.
  1153    std::string second(kPageSize / 2, 'B');
  1154    memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(),
  1155           second.size());
  1156  
  1157    // Extend the file to a full page. The second half of the page will be
  1158    // reflected in the file.
  1159    EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
  1160  
  1161    // The file may not actually be updated until munmap is called.
  1162    ASSERT_THAT(Unmap(), SyscallSucceeds());
  1163  
  1164    // The whole page is in the file.
  1165    std::vector<char> buf(kPageSize);
  1166    ASSERT_THAT(Read(buf.data(), buf.size()),
  1167                SyscallSucceedsWithValue(buf.size()));
  1168    // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
  1169    // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
  1170    // string, possibly overruning the buffer.
  1171    EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first));
  1172    EXPECT_THAT(reinterpret_cast<void*>(buf.data() + kPageSize / 2),
  1173                EqualsMemory(second));
  1174  }
  1175  
  1176  TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) {
  1177    // Start from scratch.
  1178    EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
  1179  
  1180    // Expand the file to a full page and dirty it.
  1181    std::string buf(kPageSize, 'a');
  1182    ASSERT_THAT(Write(buf.c_str(), buf.size()),
  1183                SyscallSucceedsWithValue(buf.size()));
  1184  
  1185    // Map the page.
  1186    uintptr_t addr;
  1187    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
  1188                SyscallSucceeds());
  1189  
  1190    // Check that the memory contains the file data.
  1191    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize));
  1192  
  1193    // Truncate down, then up.
  1194    EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
  1195    EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
  1196  
  1197    // Check that the memory was zeroed.
  1198    std::string zeroed(kPageSize, '\0');
  1199    EXPECT_EQ(0,
  1200              memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize));
  1201  
  1202    // The file may not actually be updated until msync is called.
  1203    ASSERT_THAT(Msync(), SyscallSucceeds());
  1204  
  1205    // Prepare to read the entire file contents.
  1206    ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
  1207  
  1208    // Expect that the file is fully updated.
  1209    std::vector<char> bufFile(kPageSize);
  1210    ASSERT_THAT(Read(bufFile.data(), bufFile.size()),
  1211                SyscallSucceedsWithValue(bufFile.size()));
  1212    EXPECT_EQ(0, memcmp(bufFile.data(), zeroed.c_str(), kPageSize));
  1213  }
  1214  
  1215  TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) {
  1216    // The file is only half of a page. We map an entire page. Writes to the
  1217    // end of the mapping must not be reflected in the file.
  1218    uintptr_t addr;
  1219    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
  1220                           fd_.get(), 0),
  1221                SyscallSucceeds());
  1222  
  1223    // First half; this will be deleted by truncate(0).
  1224    std::string first(kPageSize / 2, 'A');
  1225    memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());
  1226  
  1227    // Truncate down, then up.
  1228    EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
  1229    EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
  1230  
  1231    // The whole page is zeroed in memory.
  1232    std::string zeroed(kPageSize, '\0');
  1233    EXPECT_EQ(0,
  1234              memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize));
  1235  
  1236    // The file may not actually be updated until munmap is called.
  1237    ASSERT_THAT(Unmap(), SyscallSucceeds());
  1238  
  1239    // The whole file is also zeroed.
  1240    std::vector<char> buf(kPageSize);
  1241    ASSERT_THAT(Read(buf.data(), buf.size()),
  1242                SyscallSucceedsWithValue(buf.size()));
  1243    // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
  1244    // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
  1245    // string, possibly overruning the buffer.
  1246    EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(zeroed));
  1247  }
  1248  
  1249  TEST_F(MMapFileTest, ReadSharedTruncateSIGBUS) {
  1250    SetupGvisorDeathTest();
  1251  
  1252    // Start from scratch.
  1253    EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
  1254  
  1255    // Expand the file to a full page and dirty it.
  1256    std::string buf(kPageSize, 'a');
  1257    ASSERT_THAT(Write(buf.c_str(), buf.size()),
  1258                SyscallSucceedsWithValue(buf.size()));
  1259  
  1260    // Map the page.
  1261    uintptr_t addr;
  1262    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
  1263                SyscallSucceeds());
  1264  
  1265    // Check that the mapping contains the file data.
  1266    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize));
  1267  
  1268    // Truncate down.
  1269    EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
  1270  
  1271    // Accessing the truncated region should cause a SIGBUS.
  1272    std::vector<char> in(kPageSize);
  1273    EXPECT_EXIT(
  1274        std::copy(reinterpret_cast<volatile char*>(addr),
  1275                  reinterpret_cast<volatile char*>(addr) + kPageSize, in.data()),
  1276        ::testing::KilledBySignal(SIGBUS), "");
  1277  }
  1278  
  1279  TEST_F(MMapFileTest, WriteSharedTruncateSIGBUS) {
  1280    SetupGvisorDeathTest();
  1281  
  1282    uintptr_t addr;
  1283    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
  1284                           fd_.get(), 0),
  1285                SyscallSucceeds());
  1286  
  1287    // Touch the memory to be sure it really is mapped.
  1288    size_t len = strlen(kFileContents);
  1289    memcpy(reinterpret_cast<void*>(addr), kFileContents, len);
  1290  
  1291    // Truncate down.
  1292    EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
  1293  
  1294    // Accessing the truncated file should cause a SIGBUS.
  1295    EXPECT_EXIT(std::copy(kFileContents, kFileContents + len,
  1296                          reinterpret_cast<volatile char*>(addr)),
  1297                ::testing::KilledBySignal(SIGBUS), "");
  1298  }
  1299  
  1300  TEST_F(MMapFileTest, ReadSharedTruncatePartialPage) {
  1301    // Start from scratch.
  1302    EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
  1303  
  1304    // Dirty the file.
  1305    std::string buf(kPageSize, 'a');
  1306    ASSERT_THAT(Write(buf.c_str(), buf.size()),
  1307                SyscallSucceedsWithValue(buf.size()));
  1308  
  1309    // Map a page.
  1310    uintptr_t addr;
  1311    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
  1312                SyscallSucceeds());
  1313  
  1314    // Truncate to half of the page.
  1315    EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds());
  1316  
  1317    // First half of the page untouched.
  1318    EXPECT_EQ(0,
  1319              memcmp(reinterpret_cast<void*>(addr), buf.data(), kPageSize / 2));
  1320  
  1321    // Second half is zeroed.
  1322    std::string zeroed(kPageSize / 2, '\0');
  1323    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2),
  1324                        zeroed.c_str(), kPageSize / 2));
  1325  }
  1326  
  1327  // Page can still be accessed and contents are intact after truncating a partial
  1328  // page.
  1329  TEST_F(MMapFileTest, WriteSharedTruncatePartialPage) {
  1330    // Expand the file to a full page.
  1331    EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
  1332  
  1333    uintptr_t addr;
  1334    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
  1335                           fd_.get(), 0),
  1336                SyscallSucceeds());
  1337  
  1338    // Fill the entire page.
  1339    std::string contents(kPageSize, 'A');
  1340    memcpy(reinterpret_cast<void*>(addr), contents.c_str(), contents.size());
  1341  
  1342    // Truncate half of the page.
  1343    EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds());
  1344  
  1345    // First half of the page untouched.
  1346    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), contents.c_str(),
  1347                        kPageSize / 2));
  1348  
  1349    // Second half zeroed.
  1350    std::string zeroed(kPageSize / 2, '\0');
  1351    EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2),
  1352                        zeroed.c_str(), kPageSize / 2));
  1353  }
  1354  
  1355  // MAP_PRIVATE writes are not carried through to the underlying file.
  1356  TEST_F(MMapFileTest, WritePrivate) {
  1357    uintptr_t addr;
  1358    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
  1359                           fd_.get(), 0),
  1360                SyscallSucceeds());
  1361  
  1362    size_t len = strlen(kFileContents);
  1363    memcpy(reinterpret_cast<void*>(addr), kFileContents, len);
  1364  
  1365    // The file should not be updated, but if it mistakenly is, it may not be
  1366    // until after munmap is called.
  1367    ASSERT_THAT(Unmap(), SyscallSucceeds());
  1368  
  1369    std::vector<char> buf(len);
  1370    ASSERT_THAT(Read(buf.data(), buf.size()),
  1371                SyscallSucceedsWithValue(buf.size()));
  1372    // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
  1373    // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
  1374    // string, possibly overruning the buffer.
  1375    EXPECT_THAT(reinterpret_cast<void*>(buf.data()),
  1376                EqualsMemory(std::string(len, '\0')));
  1377  }
  1378  
  1379  // SIGBUS raised when reading or writing past end of a mapped file.
  1380  TEST_P(MMapFileParamTest, SigBusDeath) {
  1381    SetupGvisorDeathTest();
  1382  
  1383    uintptr_t addr;
  1384    ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0),
  1385                SyscallSucceeds());
  1386  
  1387    auto* start = reinterpret_cast<volatile char*>(addr + kPageSize);
  1388  
  1389    // MMapFileTest makes a file kPageSize/2 long. The entire first page should be
  1390    // accessible, but anything beyond it should not.
  1391    if (prot() & PROT_WRITE) {
  1392      // Write beyond first page.
  1393      size_t len = strlen(kFileContents);
  1394      EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, start),
  1395                  ::testing::KilledBySignal(SIGBUS), "");
  1396    } else {
  1397      // Read beyond first page.
  1398      std::vector<char> in(kPageSize);
  1399      EXPECT_EXIT(std::copy(start, start + kPageSize, in.data()),
  1400                  ::testing::KilledBySignal(SIGBUS), "");
  1401    }
  1402  }
  1403  
  1404  // Tests that SIGBUS is not raised when reading or writing to a file-mapped
  1405  // page before EOF, even if part of the mapping extends beyond EOF.
  1406  //
  1407  // See b/27877699.
  1408  TEST_P(MMapFileParamTest, NoSigBusOnPagesBeforeEOF) {
  1409    uintptr_t addr;
  1410    ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0),
  1411                SyscallSucceeds());
  1412  
  1413    // The test passes if this survives.
  1414    auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1);
  1415    size_t len = strlen(kFileContents);
  1416    if (prot() & PROT_WRITE) {
  1417      std::copy(kFileContents, kFileContents + len, start);
  1418    } else {
  1419      std::vector<char> in(len);
  1420      std::copy(start, start + len, in.data());
  1421    }
  1422  }
  1423  
  1424  // Tests that SIGBUS is not raised when reading or writing from a file-mapped
  1425  // page containing EOF, *after* the EOF.
  1426  TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) {
  1427    uintptr_t addr;
  1428    ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0),
  1429                SyscallSucceeds());
  1430  
  1431    // The test passes if this survives. (Technically addr+kPageSize/2 is already
  1432    // beyond EOF, but +1 to check for fencepost errors.)
  1433    auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1);
  1434    size_t len = strlen(kFileContents);
  1435    if (prot() & PROT_WRITE) {
  1436      std::copy(kFileContents, kFileContents + len, start);
  1437    } else {
  1438      std::vector<char> in(len);
  1439      std::copy(start, start + len, in.data());
  1440    }
  1441  }
  1442  
  1443  // Tests that reading from writable shared file-mapped pages succeeds.
  1444  //
  1445  // On most platforms this is trivial, but when the file is mapped via the sentry
  1446  // page cache (which does not yet support writing to shared mappings), a bug
  1447  // caused reads to fail unnecessarily on such mappings. See b/28913513.
  1448  TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) {
  1449    uintptr_t addr;
  1450    size_t len = strlen(kFileContents);
  1451  
  1452    ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
  1453                           fd_.get(), 0),
  1454                SyscallSucceeds());
  1455  
  1456    std::vector<char> buf(kPageSize);
  1457    // The test passes if this survives.
  1458    std::copy(reinterpret_cast<volatile char*>(addr),
  1459              reinterpret_cast<volatile char*>(addr) + len, buf.data());
  1460  }
  1461  
  1462  // Tests that EFAULT is returned when invoking a syscall that requires the OS to
  1463  // read past end of file (resulting in a fault in sentry context in the gVisor
  1464  // case). See b/28913513.
  1465  TEST_F(MMapFileTest, InternalSigBus) {
  1466    uintptr_t addr;
  1467    ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
  1468                           fd_.get(), 0),
  1469                SyscallSucceeds());
  1470  
  1471    // This depends on the fact that gVisor implements pipes internally.
  1472    int pipefd[2];
  1473    ASSERT_THAT(pipe(pipefd), SyscallSucceeds());
  1474    EXPECT_THAT(
  1475        write(pipefd[1], reinterpret_cast<void*>(addr + kPageSize), kPageSize),
  1476        SyscallFailsWithErrno(EFAULT));
  1477  
  1478    EXPECT_THAT(close(pipefd[0]), SyscallSucceeds());
  1479    EXPECT_THAT(close(pipefd[1]), SyscallSucceeds());
  1480  }
  1481  
  1482  // Like InternalSigBus, but test the WriteZerosAt path by reading from
  1483  // /dev/zero to a shared mapping (so that the SIGBUS isn't caught during
  1484  // copy-on-write breaking).
  1485  TEST_F(MMapFileTest, InternalSigBusZeroing) {
  1486    uintptr_t addr;
  1487    ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
  1488                           fd_.get(), 0),
  1489                SyscallSucceeds());
  1490  
  1491    const FileDescriptor dev_zero =
  1492        ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
  1493    EXPECT_THAT(read(dev_zero.get(), reinterpret_cast<void*>(addr + kPageSize),
  1494                     kPageSize),
  1495                SyscallFailsWithErrno(EFAULT));
  1496  }
  1497  
  1498  // Checks that mmaps with a length of uint64_t(-PAGE_SIZE + 1) or greater do not
  1499  // induce a sentry panic (due to "rounding up" to 0).
  1500  TEST_F(MMapTest, HugeLength) {
  1501    EXPECT_THAT(Map(0, static_cast<uint64_t>(-kPageSize + 1), PROT_NONE,
  1502                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
  1503                SyscallFailsWithErrno(ENOMEM));
  1504  }
  1505  
  1506  // Tests for a specific gVisor MM caching bug.
  1507  TEST_F(MMapTest, AccessCOWInvalidatesCachedSegments) {
  1508    auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
  1509    auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
  1510    auto zero_fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
  1511  
  1512    // Get a two-page private mapping and fill it with 1s.
  1513    uintptr_t addr;
  1514    ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE,
  1515                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
  1516                SyscallSucceeds());
  1517    memset(addr_, 1, 2 * kPageSize);
  1518    MaybeSave();
  1519  
  1520    // Fork to make the mapping copy-on-write.
  1521    pid_t const pid = fork();
  1522    if (pid == 0) {
  1523      // The child process waits for the parent to SIGKILL it.
  1524      while (true) {
  1525        pause();
  1526      }
  1527    }
  1528    ASSERT_THAT(pid, SyscallSucceeds());
  1529    auto cleanup_child = Cleanup([&] {
  1530      EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds());
  1531      int status;
  1532      EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
  1533    });
  1534  
  1535    // Induce a read-only Access of the first page of the mapping, which will not
  1536    // cause a copy. The usermem.Segment should be cached.
  1537    ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0),
  1538                SyscallSucceedsWithValue(kPageSize));
  1539  
  1540    // Induce a writable Access of both pages of the mapping. This should
  1541    // invalidate the cached Segment.
  1542    ASSERT_THAT(PreadFd(zero_fd.get(), addr_, 2 * kPageSize, 0),
  1543                SyscallSucceedsWithValue(2 * kPageSize));
  1544  
  1545    // Induce a read-only Access of the first page of the mapping again. It should
  1546    // read the 0s that were stored in the mapping by the read from /dev/zero. If
  1547    // the read failed to invalidate the cached Segment, it will instead read the
  1548    // 1s in the stale page.
  1549    ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0),
  1550                SyscallSucceedsWithValue(kPageSize));
  1551    std::vector<char> buf(kPageSize);
  1552    ASSERT_THAT(PreadFd(fd.get(), buf.data(), kPageSize, 0),
  1553                SyscallSucceedsWithValue(kPageSize));
  1554    for (size_t i = 0; i < kPageSize; i++) {
  1555      ASSERT_EQ(0, buf[i]) << "at offset " << i;
  1556    }
  1557  }
  1558  
  1559  TEST_F(MMapTest, NoReserve) {
  1560    const size_t kSize = 10 * 1 << 20;  // 10M
  1561    uintptr_t addr;
  1562    ASSERT_THAT(addr = Map(0, kSize, PROT_READ | PROT_WRITE,
  1563                           MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0),
  1564                SyscallSucceeds());
  1565    EXPECT_GT(addr, 0);
  1566  
  1567    // Check that every page can be read/written. Technically, writing to memory
  1568    // could SIGSEGV in case there is no more memory available. In gVisor it
  1569    // would never happen though because NORESERVE is ignored. In Linux, it's
  1570    // possible to fail, but allocation is small enough that it's highly likely
  1571    // to succeed.
  1572    for (size_t j = 0; j < kSize; j += kPageSize) {
  1573      EXPECT_EQ(0, reinterpret_cast<char*>(addr)[j]);
  1574      reinterpret_cast<char*>(addr)[j] = j;
  1575    }
  1576  }
  1577  
  1578  // Map more than the gVisor page-cache map unit (64k) and ensure that
  1579  // it is consistent with reading from the file.
  1580  TEST_F(MMapFileTest, Bug38498194) {
  1581    // Choose a sufficiently large map unit.
  1582    constexpr int kSize = 4 * 1024 * 1024;
  1583    EXPECT_THAT(ftruncate(fd_.get(), kSize), SyscallSucceeds());
  1584  
  1585    // Map a large enough region so that multiple internal segments
  1586    // are created to back the mapping.
  1587    uintptr_t addr;
  1588    ASSERT_THAT(
  1589        addr = Map(0, kSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0),
  1590        SyscallSucceeds());
  1591  
  1592    std::vector<char> expect(kSize, 'a');
  1593    std::copy(expect.data(), expect.data() + expect.size(),
  1594              reinterpret_cast<volatile char*>(addr));
  1595  
  1596    // Trigger writeback for gVisor. In Linux pages stay cached until
  1597    // it can't hold onto them anymore.
  1598    ASSERT_THAT(Unmap(), SyscallSucceeds());
  1599  
  1600    std::vector<char> buf(kSize);
  1601    ASSERT_THAT(Read(buf.data(), buf.size()),
  1602                SyscallSucceedsWithValue(buf.size()));
  1603    EXPECT_EQ(buf, expect) << std::string(buf.data(), buf.size());
  1604  }
  1605  
  1606  // Tests that reading from a file to a memory mapping of the same file does not
  1607  // deadlock. See b/34813270.
  1608  TEST_F(MMapFileTest, SelfRead) {
  1609    uintptr_t addr;
  1610    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
  1611                           fd_.get(), 0),
  1612                SyscallSucceeds());
  1613    EXPECT_THAT(Read(reinterpret_cast<char*>(addr), kPageSize / 2),
  1614                SyscallSucceedsWithValue(kPageSize / 2));
  1615    // The resulting file contents are poorly-specified and irrelevant.
  1616  }
  1617  
  1618  // Tests that writing to a file from a memory mapping of the same file does not
  1619  // deadlock. Regression test for b/34813270.
  1620  TEST_F(MMapFileTest, SelfWrite) {
  1621    uintptr_t addr;
  1622    ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
  1623                SyscallSucceeds());
  1624    EXPECT_THAT(Write(reinterpret_cast<char*>(addr), kPageSize / 2),
  1625                SyscallSucceedsWithValue(kPageSize / 2));
  1626    // The resulting file contents are poorly-specified and irrelevant.
  1627  }
  1628  
  1629  TEST(MMapDeathTest, TruncateAfterCOWBreak) {
  1630    SetupGvisorDeathTest();
  1631  
  1632    // Create and map a single-page file.
  1633    auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
  1634    auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDWR));
  1635    ASSERT_THAT(ftruncate(fd.get(), kPageSize), SyscallSucceeds());
  1636    auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
  1637        nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0));
  1638  
  1639    // Write to this mapping, causing the page to be copied for write.
  1640    memset(mapping.ptr(), 'a', mapping.len());
  1641    MaybeSave();  // Trigger a co-operative save cycle.
  1642  
  1643    // Truncate the file and expect it to invalidate the copied page.
  1644    ASSERT_THAT(ftruncate(fd.get(), 0), SyscallSucceeds());
  1645    EXPECT_EXIT(*reinterpret_cast<volatile char*>(mapping.ptr()),
  1646                ::testing::KilledBySignal(SIGBUS), "");
  1647  }
  1648  
  1649  // Regression test for #147.
  1650  TEST(MMapNoFixtureTest, MapReadOnlyAfterCreateWriteOnly) {
  1651    std::string filename = NewTempAbsPath();
  1652  
  1653    // We have to create the file O_RDONLY to reproduce the bug because
  1654    // fsgofer.localFile.Create() silently upgrades O_WRONLY to O_RDWR, causing
  1655    // the cached "write-only" FD to be read/write and therefore usable by mmap().
  1656    auto const ro_fd = ASSERT_NO_ERRNO_AND_VALUE(
  1657        Open(filename, O_RDONLY | O_CREAT | O_EXCL, 0666));
  1658  
  1659    // Get a write-only FD for the same file, which should be ignored by mmap()
  1660    // (but isn't in #147).
  1661    auto const wo_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_WRONLY));
  1662    ASSERT_THAT(ftruncate(wo_fd.get(), kPageSize), SyscallSucceeds());
  1663  
  1664    auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
  1665        Mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, ro_fd.get(), 0));
  1666    std::vector<char> buf(kPageSize);
  1667    // The test passes if this survives.
  1668    std::copy(static_cast<char*>(mapping.ptr()),
  1669              static_cast<char*>(mapping.endptr()), buf.data());
  1670  }
  1671  
  1672  // Conditional on MAP_32BIT.
  1673  // This flag is supported only on x86-64, for 64-bit programs.
  1674  #ifdef __x86_64__
  1675  
  1676  TEST(MMapNoFixtureTest, Map32Bit) {
  1677    auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
  1678        MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE | MAP_32BIT));
  1679    EXPECT_LT(mapping.addr(), static_cast<uintptr_t>(1) << 32);
  1680    EXPECT_LE(mapping.endaddr(), static_cast<uintptr_t>(1) << 32);
  1681  }
  1682  
  1683  #endif  // defined(__x86_64__)
  1684  
  1685  INSTANTIATE_TEST_SUITE_P(
  1686      ReadWriteSharedPrivate, MMapFileParamTest,
  1687      ::testing::Combine(::testing::ValuesIn({
  1688                             PROT_READ,
  1689                             PROT_WRITE,
  1690                             PROT_READ | PROT_WRITE,
  1691                         }),
  1692                         ::testing::ValuesIn({MAP_SHARED, MAP_PRIVATE})));
  1693  
  1694  }  // namespace
  1695  
  1696  }  // namespace testing
  1697  }  // namespace gvisor