gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/syscalls/linux/mmap.cc (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <linux/magic.h> 18 #include <linux/unistd.h> 19 #include <signal.h> 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <sys/mman.h> 24 #include <sys/resource.h> 25 #include <sys/statfs.h> 26 #include <sys/syscall.h> 27 #include <sys/time.h> 28 #include <sys/types.h> 29 #include <sys/wait.h> 30 #include <unistd.h> 31 32 #include <limits> 33 #include <vector> 34 35 #include "gmock/gmock.h" 36 #include "gtest/gtest.h" 37 #include "absl/strings/escaping.h" 38 #include "absl/strings/str_split.h" 39 #include "test/util/cleanup.h" 40 #include "test/util/file_descriptor.h" 41 #include "test/util/fs_util.h" 42 #include "test/util/memory_util.h" 43 #include "test/util/multiprocess_util.h" 44 #include "test/util/temp_path.h" 45 #include "test/util/test_util.h" 46 47 using ::testing::AnyOf; 48 using ::testing::Eq; 49 using ::testing::Gt; 50 51 namespace gvisor { 52 namespace testing { 53 54 namespace { 55 56 PosixErrorOr<int64_t> VirtualMemorySize() { 57 ASSIGN_OR_RETURN_ERRNO(auto contents, GetContents("/proc/self/statm")); 58 std::vector<std::string> parts = absl::StrSplit(contents, ' '); 59 if (parts.empty()) { 60 return PosixError(EINVAL, "Unable to parse /proc/self/statm"); 61 } 62 ASSIGN_OR_RETURN_ERRNO(auto pages, Atoi<int64_t>(parts[0])); 63 return pages * getpagesize(); 64 } 65 66 class MMapTest : public ::testing::Test { 67 protected: 68 // Unmap mapping, if one was made. 69 void TearDown() override { 70 if (addr_) { 71 EXPECT_THAT(Unmap(), SyscallSucceeds()); 72 } 73 } 74 75 // Remembers mapping, so it can be automatically unmapped. 76 uintptr_t Map(uintptr_t addr, size_t length, int prot, int flags, int fd, 77 off_t offset) { 78 void* ret = 79 mmap(reinterpret_cast<void*>(addr), length, prot, flags, fd, offset); 80 81 if (ret != MAP_FAILED) { 82 addr_ = ret; 83 length_ = length; 84 } 85 86 return reinterpret_cast<uintptr_t>(ret); 87 } 88 89 // Unmap previous mapping 90 int Unmap() { 91 if (!addr_) { 92 return -1; 93 } 94 95 int ret = MunmapSafe(addr_, length_); 96 97 addr_ = nullptr; 98 length_ = 0; 99 100 return ret; 101 } 102 103 // Msync the mapping. 104 int Msync() { return msync(addr_, length_, MS_SYNC); } 105 106 // Mlock the mapping. 107 int Mlock() { return mlock(addr_, length_); } 108 109 // Munlock the mapping. 110 int Munlock() { return munlock(addr_, length_); } 111 112 int Protect(uintptr_t addr, size_t length, int prot) { 113 return mprotect(reinterpret_cast<void*>(addr), length, prot); 114 } 115 116 void* addr_ = nullptr; 117 size_t length_ = 0; 118 }; 119 120 // Matches if arg contains the same contents as string str. 121 MATCHER_P(EqualsMemory, str, "") { 122 if (0 == memcmp(arg, str.c_str(), str.size())) { 123 return true; 124 } 125 126 *result_listener << "Memory did not match. Got:\n" 127 << absl::BytesToHexString( 128 std::string(static_cast<char*>(arg), str.size())) 129 << "Want:\n" 130 << absl::BytesToHexString(str); 131 return false; 132 } 133 134 // We can't map pipes, but for different reasons. 135 TEST_F(MMapTest, MapPipe) { 136 int fds[2]; 137 ASSERT_THAT(pipe(fds), SyscallSucceeds()); 138 EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[0], 0), 139 SyscallFailsWithErrno(ENODEV)); 140 EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[1], 0), 141 SyscallFailsWithErrno(EACCES)); 142 ASSERT_THAT(close(fds[0]), SyscallSucceeds()); 143 ASSERT_THAT(close(fds[1]), SyscallSucceeds()); 144 } 145 146 // It's very common to mmap /dev/zero because anonymous mappings aren't part 147 // of POSIX although they are widely supported. So a zero initialized memory 148 // region would actually come from a "file backed" /dev/zero mapping. 149 TEST_F(MMapTest, MapDevZeroShared) { 150 // This test will verify that we're able to map a page backed by /dev/zero 151 // as MAP_SHARED. 152 const FileDescriptor dev_zero = 153 ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); 154 155 // Test that we can create a RW SHARED mapping of /dev/zero. 156 ASSERT_THAT( 157 Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), 158 SyscallSucceeds()); 159 } 160 161 TEST_F(MMapTest, MapDevZeroPrivate) { 162 // This test will verify that we're able to map a page backed by /dev/zero 163 // as MAP_PRIVATE. 164 const FileDescriptor dev_zero = 165 ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); 166 167 // Test that we can create a RW SHARED mapping of /dev/zero. 168 ASSERT_THAT( 169 Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0), 170 SyscallSucceeds()); 171 } 172 173 TEST_F(MMapTest, MapDevZeroNoPersistence) { 174 // This test will verify that two independent mappings of /dev/zero do not 175 // appear to reference the same "backed file." 176 177 const FileDescriptor dev_zero1 = 178 ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); 179 const FileDescriptor dev_zero2 = 180 ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); 181 182 ASSERT_THAT( 183 Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero1.get(), 0), 184 SyscallSucceeds()); 185 186 // Create a second mapping via the second /dev/zero fd. 187 void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 188 dev_zero2.get(), 0); 189 ASSERT_THAT(reinterpret_cast<intptr_t>(psec_map), SyscallSucceeds()); 190 191 // Always unmap. 192 auto cleanup_psec_map = Cleanup( 193 [&] { EXPECT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); }); 194 195 // Verify that we have independently addressed pages. 196 ASSERT_NE(psec_map, addr_); 197 198 std::string buf_zero(kPageSize, 0x00); 199 std::string buf_ones(kPageSize, 0xFF); 200 201 // Verify the first is actually all zeros after mmap. 202 EXPECT_THAT(addr_, EqualsMemory(buf_zero)); 203 204 // Let's fill in the first mapping with 0xFF. 205 memcpy(addr_, buf_ones.data(), kPageSize); 206 207 // Verify that the memcpy actually stuck in the page. 208 EXPECT_THAT(addr_, EqualsMemory(buf_ones)); 209 210 // Verify that it didn't affect the second page which should be all zeros. 211 EXPECT_THAT(psec_map, EqualsMemory(buf_zero)); 212 } 213 214 TEST_F(MMapTest, MapDevZeroSharedMultiplePages) { 215 // This will test that we're able to map /dev/zero over multiple pages. 216 const FileDescriptor dev_zero = 217 ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); 218 219 // Test that we can create a RW SHARED mapping of /dev/zero. 220 ASSERT_THAT(Map(0, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE, 221 dev_zero.get(), 0), 222 SyscallSucceeds()); 223 224 std::string buf_zero(kPageSize * 2, 0x00); 225 std::string buf_ones(kPageSize * 2, 0xFF); 226 227 // Verify the two pages are actually all zeros after mmap. 228 EXPECT_THAT(addr_, EqualsMemory(buf_zero)); 229 230 // Fill out the pages with all ones. 231 memcpy(addr_, buf_ones.data(), kPageSize * 2); 232 233 // Verify that the memcpy actually stuck in the pages. 234 EXPECT_THAT(addr_, EqualsMemory(buf_ones)); 235 } 236 237 TEST_F(MMapTest, MapDevZeroSharedFdNoPersistence) { 238 // This test will verify that two independent mappings of /dev/zero do not 239 // appear to reference the same "backed file" even when mapped from the 240 // same initial fd. 241 const FileDescriptor dev_zero = 242 ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); 243 244 ASSERT_THAT( 245 Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), 246 SyscallSucceeds()); 247 248 // Create a second mapping via the same fd. 249 void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 250 dev_zero.get(), 0); 251 ASSERT_THAT(reinterpret_cast<int64_t>(psec_map), SyscallSucceeds()); 252 253 // Always unmap. 254 auto cleanup_psec_map = Cleanup( 255 [&] { ASSERT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); }); 256 257 // Verify that we have independently addressed pages. 258 ASSERT_NE(psec_map, addr_); 259 260 std::string buf_zero(kPageSize, 0x00); 261 std::string buf_ones(kPageSize, 0xFF); 262 263 // Verify the first is actually all zeros after mmap. 264 EXPECT_THAT(addr_, EqualsMemory(buf_zero)); 265 266 // Let's fill in the first mapping with 0xFF. 267 memcpy(addr_, buf_ones.data(), kPageSize); 268 269 // Verify that the memcpy actually stuck in the page. 270 EXPECT_THAT(addr_, EqualsMemory(buf_ones)); 271 272 // Verify that it didn't affect the second page which should be all zeros. 273 EXPECT_THAT(psec_map, EqualsMemory(buf_zero)); 274 } 275 276 TEST_F(MMapTest, MapDevZeroSegfaultAfterUnmap) { 277 SetupGvisorDeathTest(); 278 279 // This test will verify that we're able to map a page backed by /dev/zero 280 // as MAP_SHARED and after it's unmapped any access results in a SIGSEGV. 281 // This test is redundant but given the special nature of /dev/zero mappings 282 // it doesn't hurt. 283 const FileDescriptor dev_zero = 284 ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); 285 286 const auto rest = [&] { 287 // Test that we can create a RW SHARED mapping of /dev/zero. 288 TEST_PCHECK(Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 289 dev_zero.get(), 290 0) != reinterpret_cast<uintptr_t>(MAP_FAILED)); 291 292 // Confirm that accesses after the unmap result in a SIGSEGV. 293 // 294 // N.B. We depend on this process being single-threaded to ensure there 295 // can't be another mmap to map addr before the dereference below. 296 void* addr_saved = addr_; // Unmap resets addr_. 297 TEST_PCHECK(Unmap() == 0); 298 *reinterpret_cast<volatile int*>(addr_saved) = 0xFF; 299 }; 300 301 int child_exit_status = ASSERT_NO_ERRNO_AND_VALUE(InForkedProcess(rest)); 302 EXPECT_TRUE(WIFSIGNALED(child_exit_status) && 303 WTERMSIG(child_exit_status) == SIGSEGV) 304 << "exit status: " << child_exit_status; 305 } 306 307 TEST_F(MMapTest, MapDevZeroUnaligned) { 308 const FileDescriptor dev_zero = 309 ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); 310 const size_t size = kPageSize + kPageSize / 2; 311 const std::string buf_zero(size, 0x00); 312 313 ASSERT_THAT( 314 Map(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), 315 SyscallSucceeds()); 316 EXPECT_THAT(addr_, EqualsMemory(buf_zero)); 317 ASSERT_THAT(Unmap(), SyscallSucceeds()); 318 319 ASSERT_THAT( 320 Map(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0), 321 SyscallSucceeds()); 322 EXPECT_THAT(addr_, EqualsMemory(buf_zero)); 323 } 324 325 // We can't map _some_ character devices. 326 TEST_F(MMapTest, MapCharDevice) { 327 const FileDescriptor cdevfd = 328 ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/random", 0, 0)); 329 EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, cdevfd.get(), 0), 330 SyscallFailsWithErrno(ENODEV)); 331 } 332 333 // We can't map directories. 334 TEST_F(MMapTest, MapDirectory) { 335 const FileDescriptor dirfd = 336 ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), 0, 0)); 337 EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, dirfd.get(), 0), 338 SyscallFailsWithErrno(ENODEV)); 339 } 340 341 // We can map *something* 342 TEST_F(MMapTest, MapAnything) { 343 EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 344 SyscallSucceedsWithValue(Gt(0))); 345 } 346 347 // Map length < PageSize allowed 348 TEST_F(MMapTest, SmallMap) { 349 EXPECT_THAT(Map(0, 128, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 350 SyscallSucceeds()); 351 } 352 353 // Hint address doesn't break anything. 354 // Note: there is no requirement we actually get the hint address 355 TEST_F(MMapTest, HintAddress) { 356 EXPECT_THAT( 357 Map(0x30000000, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 358 SyscallSucceeds()); 359 } 360 361 // MAP_FIXED gives us exactly the requested address 362 TEST_F(MMapTest, MapFixed) { 363 EXPECT_THAT(Map(0x30000000, kPageSize, PROT_NONE, 364 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0), 365 SyscallSucceedsWithValue(0x30000000)); 366 } 367 368 // 64-bit addresses work too 369 #if defined(__x86_64__) || defined(__aarch64__) 370 TEST_F(MMapTest, MapFixed64) { 371 EXPECT_THAT(Map(0x300000000000, kPageSize, PROT_NONE, 372 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0), 373 SyscallSucceedsWithValue(0x300000000000)); 374 } 375 #endif 376 377 // MAP_STACK allowed. 378 // There isn't a good way to verify it did anything. 379 TEST_F(MMapTest, MapStack) { 380 EXPECT_THAT(Map(0, kPageSize, PROT_NONE, 381 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0), 382 SyscallSucceeds()); 383 } 384 385 // MAP_LOCKED allowed. 386 // There isn't a good way to verify it did anything. 387 TEST_F(MMapTest, MapLocked) { 388 EXPECT_THAT(Map(0, kPageSize, PROT_NONE, 389 MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0), 390 SyscallSucceeds()); 391 } 392 393 // MAP_PRIVATE or MAP_SHARED must be passed 394 TEST_F(MMapTest, NotPrivateOrShared) { 395 EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_ANONYMOUS, -1, 0), 396 SyscallFailsWithErrno(EINVAL)); 397 } 398 399 // Only one of MAP_PRIVATE or MAP_SHARED may be passed 400 TEST_F(MMapTest, PrivateAndShared) { 401 EXPECT_THAT(Map(0, kPageSize, PROT_NONE, 402 MAP_PRIVATE | MAP_SHARED | MAP_ANONYMOUS, -1, 0), 403 SyscallFailsWithErrno(EINVAL)); 404 } 405 406 TEST_F(MMapTest, FixedAlignment) { 407 // Addr must be page aligned (MAP_FIXED) 408 EXPECT_THAT(Map(0x30000001, kPageSize, PROT_NONE, 409 MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0), 410 SyscallFailsWithErrno(EINVAL)); 411 } 412 413 // Non-MAP_FIXED address does not need to be page aligned 414 TEST_F(MMapTest, NonFixedAlignment) { 415 EXPECT_THAT( 416 Map(0x30000001, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 417 SyscallSucceeds()); 418 } 419 420 // Length = 0 results in EINVAL. 421 TEST_F(MMapTest, InvalidLength) { 422 EXPECT_THAT(Map(0, 0, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 423 SyscallFailsWithErrno(EINVAL)); 424 } 425 426 // Bad fd not allowed. 427 TEST_F(MMapTest, BadFd) { 428 EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE, 999, 0), 429 SyscallFailsWithErrno(EBADF)); 430 } 431 432 // Mappings are writable. 433 TEST_F(MMapTest, ProtWrite) { 434 uint64_t addr; 435 constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; 436 437 EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, 438 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 439 SyscallSucceeds()); 440 441 // This shouldn't cause a SIGSEGV. 442 memset(reinterpret_cast<void*>(addr), 42, kPageSize); 443 444 // The written data should actually be there. 445 EXPECT_EQ( 446 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); 447 } 448 449 // "Write-only" mappings are writable *and* readable. 450 TEST_F(MMapTest, ProtWriteOnly) { 451 uint64_t addr; 452 constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; 453 454 EXPECT_THAT( 455 addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 456 SyscallSucceeds()); 457 458 // This shouldn't cause a SIGSEGV. 459 memset(reinterpret_cast<void*>(addr), 42, kPageSize); 460 461 // The written data should actually be there. 462 EXPECT_EQ( 463 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); 464 } 465 466 // "Write-only" mappings are readable. 467 // 468 // This is distinct from above to ensure the page is accessible even if the 469 // initial fault is a write fault. 470 TEST_F(MMapTest, ProtWriteOnlyReadable) { 471 uint64_t addr; 472 constexpr uint64_t kFirstWord = 0; 473 474 EXPECT_THAT( 475 addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 476 SyscallSucceeds()); 477 478 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), &kFirstWord, 479 sizeof(kFirstWord))); 480 } 481 482 // Mappings are writable after mprotect from PROT_NONE to PROT_READ|PROT_WRITE. 483 TEST_F(MMapTest, ProtectProtWrite) { 484 uint64_t addr; 485 constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; 486 487 EXPECT_THAT( 488 addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 489 SyscallSucceeds()); 490 491 ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE), 492 SyscallSucceeds()); 493 494 // This shouldn't cause a SIGSEGV. 495 memset(reinterpret_cast<void*>(addr), 42, kPageSize); 496 497 // The written data should actually be there. 498 EXPECT_EQ( 499 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); 500 } 501 502 // SIGSEGV raised when reading PROT_NONE memory 503 TEST_F(MMapTest, ProtNoneDeath) { 504 SetupGvisorDeathTest(); 505 506 uintptr_t addr; 507 508 ASSERT_THAT( 509 addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 510 SyscallSucceeds()); 511 512 EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr), 513 ::testing::KilledBySignal(SIGSEGV), ""); 514 } 515 516 // SIGSEGV raised when writing PROT_READ only memory 517 TEST_F(MMapTest, ReadOnlyDeath) { 518 SetupGvisorDeathTest(); 519 520 uintptr_t addr; 521 522 ASSERT_THAT( 523 addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 524 SyscallSucceeds()); 525 526 EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr) = 42, 527 ::testing::KilledBySignal(SIGSEGV), ""); 528 } 529 530 // Writable mapping mprotect'd to read-only should not be writable. 531 TEST_F(MMapTest, MprotectReadOnlyDeath) { 532 SetupGvisorDeathTest(); 533 534 uintptr_t addr; 535 536 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, 537 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 538 SyscallSucceeds()); 539 540 volatile int* val = reinterpret_cast<int*>(addr); 541 542 // Copy to ensure page is mapped in. 543 *val = 42; 544 545 ASSERT_THAT(Protect(addr, kPageSize, PROT_READ), SyscallSucceeds()); 546 547 // Now it shouldn't be writable. 548 EXPECT_EXIT(*val = 0, ::testing::KilledBySignal(SIGSEGV), ""); 549 } 550 551 // Verify that calling mprotect an address that's not page aligned fails. 552 TEST_F(MMapTest, MprotectNotPageAligned) { 553 uintptr_t addr; 554 555 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, 556 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 557 SyscallSucceeds()); 558 ASSERT_THAT(Protect(addr + 1, kPageSize - 1, PROT_READ), 559 SyscallFailsWithErrno(EINVAL)); 560 } 561 562 // Verify that calling mprotect with an absurdly huge length fails. 563 TEST_F(MMapTest, MprotectHugeLength) { 564 uintptr_t addr; 565 566 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, 567 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 568 SyscallSucceeds()); 569 ASSERT_THAT(Protect(addr, static_cast<size_t>(-1), PROT_READ), 570 SyscallFailsWithErrno(ENOMEM)); 571 } 572 573 #if defined(__x86_64__) || defined(__i386__) 574 // This code is equivalent in 32 and 64-bit mode 575 const uint8_t machine_code[] = { 576 0xb8, 0x2a, 0x00, 0x00, 0x00, // movl $42, %eax 577 0xc3, // retq 578 }; 579 #elif defined(__aarch64__) 580 const uint8_t machine_code[] = { 581 0x40, 0x05, 0x80, 0x52, // mov w0, #42 582 0xc0, 0x03, 0x5f, 0xd6, // ret 583 }; 584 #elif defined(__riscv) 585 const uint8_t machine_code[] = { 586 0x13, 0x05, 0xa0, 0x02, // li a0,42 587 0x82, 0x80 // ret 588 }; 589 #endif 590 591 // PROT_EXEC allows code execution 592 TEST_F(MMapTest, ProtExec) { 593 uintptr_t addr; 594 uint32_t (*func)(void); 595 596 EXPECT_THAT(addr = Map(0, kPageSize, PROT_EXEC | PROT_READ | PROT_WRITE, 597 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 598 SyscallSucceeds()); 599 600 memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code)); 601 602 #if defined(__aarch64__) 603 // We use this as a memory barrier for Arm64. 604 ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_EXEC), 605 SyscallSucceeds()); 606 #endif 607 608 func = reinterpret_cast<uint32_t (*)(void)>(addr); 609 610 EXPECT_EQ(42, func()); 611 } 612 613 // No PROT_EXEC disallows code execution 614 TEST_F(MMapTest, NoProtExecDeath) { 615 SetupGvisorDeathTest(); 616 617 uintptr_t addr; 618 uint32_t (*func)(void); 619 620 EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, 621 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 622 SyscallSucceeds()); 623 624 memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code)); 625 626 func = reinterpret_cast<uint32_t (*)(void)>(addr); 627 628 EXPECT_EXIT(func(), ::testing::KilledBySignal(SIGSEGV), ""); 629 } 630 631 TEST_F(MMapTest, NoExceedLimitData) { 632 void* prevbrk; 633 void* target_brk; 634 struct rlimit setlim; 635 636 prevbrk = sbrk(0); 637 ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); 638 target_brk = reinterpret_cast<char*>(prevbrk) + 1; 639 640 setlim.rlim_cur = RLIM_INFINITY; 641 setlim.rlim_max = RLIM_INFINITY; 642 ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); 643 EXPECT_THAT(brk(target_brk), SyscallSucceedsWithValue(0)); 644 } 645 646 TEST_F(MMapTest, ExceedLimitData) { 647 // To unit test this more precisely, we'd need access to the mm's start_brk 648 // and end_brk, which we don't have direct access to :/ 649 void* prevbrk; 650 void* target_brk; 651 struct rlimit setlim; 652 653 prevbrk = sbrk(0); 654 ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); 655 target_brk = reinterpret_cast<char*>(prevbrk) + 8192; 656 657 setlim.rlim_cur = 0; 658 setlim.rlim_max = RLIM_INFINITY; 659 // Set RLIMIT_DATA very low so any subsequent brk() calls fail. 660 // Reset RLIMIT_DATA during teardown step. 661 ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); 662 EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); 663 // Teardown step... 664 setlim.rlim_cur = RLIM_INFINITY; 665 ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); 666 } 667 668 TEST_F(MMapTest, ExceedLimitDataPrlimit) { 669 // To unit test this more precisely, we'd need access to the mm's start_brk 670 // and end_brk, which we don't have direct access to :/ 671 void* prevbrk; 672 void* target_brk; 673 struct rlimit setlim; 674 675 prevbrk = sbrk(0); 676 ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); 677 target_brk = reinterpret_cast<char*>(prevbrk) + 8192; 678 679 setlim.rlim_cur = 0; 680 setlim.rlim_max = RLIM_INFINITY; 681 // Set RLIMIT_DATA very low so any subsequent brk() calls fail. 682 // Reset RLIMIT_DATA during teardown step. 683 ASSERT_THAT(prlimit(0, RLIMIT_DATA, &setlim, nullptr), SyscallSucceeds()); 684 EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); 685 // Teardown step... 686 setlim.rlim_cur = RLIM_INFINITY; 687 ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); 688 } 689 690 TEST_F(MMapTest, ExceedLimitDataPrlimitPID) { 691 // To unit test this more precisely, we'd need access to the mm's start_brk 692 // and end_brk, which we don't have direct access to :/ 693 void* prevbrk; 694 void* target_brk; 695 struct rlimit setlim; 696 697 prevbrk = sbrk(0); 698 ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); 699 target_brk = reinterpret_cast<char*>(prevbrk) + 8192; 700 701 setlim.rlim_cur = 0; 702 setlim.rlim_max = RLIM_INFINITY; 703 // Set RLIMIT_DATA very low so any subsequent brk() calls fail. 704 // Reset RLIMIT_DATA during teardown step. 705 ASSERT_THAT(prlimit(syscall(__NR_gettid), RLIMIT_DATA, &setlim, nullptr), 706 SyscallSucceeds()); 707 EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); 708 // Teardown step... 709 setlim.rlim_cur = RLIM_INFINITY; 710 ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); 711 } 712 713 TEST_F(MMapTest, NoExceedLimitAS) { 714 constexpr uint64_t kAllocBytes = 200 << 20; 715 // Add some headroom to the AS limit in case of e.g. unexpected stack 716 // expansion. 717 constexpr uint64_t kExtraASBytes = kAllocBytes + (20 << 20); 718 static_assert(kAllocBytes < kExtraASBytes, 719 "test depends on allocation not exceeding AS limit"); 720 721 auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize()); 722 struct rlimit setlim; 723 setlim.rlim_cur = vss + kExtraASBytes; 724 setlim.rlim_max = RLIM_INFINITY; 725 ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds()); 726 EXPECT_THAT( 727 Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 728 SyscallSucceedsWithValue(Gt(0))); 729 } 730 731 TEST_F(MMapTest, ExceedLimitAS) { 732 constexpr uint64_t kAllocBytes = 200 << 20; 733 // Add some headroom to the AS limit in case of e.g. unexpected stack 734 // expansion. 735 constexpr uint64_t kExtraASBytes = 20 << 20; 736 static_assert(kAllocBytes > kExtraASBytes, 737 "test depends on allocation exceeding AS limit"); 738 739 auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize()); 740 struct rlimit setlim; 741 setlim.rlim_cur = vss + kExtraASBytes; 742 setlim.rlim_max = RLIM_INFINITY; 743 ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds()); 744 EXPECT_THAT( 745 Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 746 SyscallFailsWithErrno(ENOMEM)); 747 } 748 749 // Tests that setting an anonymous mmap to PROT_NONE doesn't free the memory. 750 TEST_F(MMapTest, SettingProtNoneDoesntFreeMemory) { 751 uintptr_t addr; 752 constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; 753 754 EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, 755 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 756 SyscallSucceedsWithValue(Gt(0))); 757 758 memset(reinterpret_cast<void*>(addr), 42, kPageSize); 759 760 ASSERT_THAT(Protect(addr, kPageSize, PROT_NONE), SyscallSucceeds()); 761 ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE), 762 SyscallSucceeds()); 763 764 // The written data should still be there. 765 EXPECT_EQ( 766 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); 767 } 768 769 constexpr char kFileContents[] = "Hello World!"; 770 771 class MMapFileTest : public MMapTest { 772 protected: 773 FileDescriptor fd_; 774 std::string filename_; 775 776 // Open a file for read/write 777 void SetUp() override { 778 MMapTest::SetUp(); 779 780 filename_ = NewTempAbsPath(); 781 fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_CREAT | O_RDWR, 0644)); 782 783 // Extend file so it can be written once mapped. Deliberately make the file 784 // only half a page in size, so we can test what happens when we access the 785 // second half. 786 // Use ftruncate(2) once the sentry supports it. 787 char zero = 0; 788 size_t count = 0; 789 do { 790 const DisableSave ds; // saving 2048 times is slow and useless. 791 Write(&zero, 1), SyscallSucceedsWithValue(1); 792 } while (++count < (kPageSize / 2)); 793 ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); 794 } 795 796 // Close and delete file 797 void TearDown() override { 798 MMapTest::TearDown(); 799 fd_.reset(); // Make sure the files is closed before we unlink it. 800 ASSERT_THAT(unlink(filename_.c_str()), SyscallSucceeds()); 801 } 802 803 bool FSSupportsMap() const { 804 bool supported = true; 805 void* ret = mmap(nullptr, 1, PROT_NONE, MAP_PRIVATE, fd_.get(), 0); 806 if (ret == MAP_FAILED && errno != ENODEV) { 807 supported = false; 808 } 809 if (ret != MAP_FAILED) { 810 munmap(ret, 1); 811 } 812 813 return supported; 814 } 815 816 ssize_t Read(char* buf, size_t count) { 817 ssize_t len = 0; 818 do { 819 ssize_t ret = read(fd_.get(), buf, count); 820 if (ret < 0) { 821 return ret; 822 } else if (ret == 0) { 823 return len; 824 } 825 826 len += ret; 827 buf += ret; 828 } while (len < static_cast<ssize_t>(count)); 829 830 return len; 831 } 832 833 ssize_t Write(const char* buf, size_t count) { 834 ssize_t len = 0; 835 do { 836 ssize_t ret = write(fd_.get(), buf, count); 837 if (ret < 0) { 838 return ret; 839 } else if (ret == 0) { 840 return len; 841 } 842 843 len += ret; 844 buf += ret; 845 } while (len < static_cast<ssize_t>(count)); 846 847 return len; 848 } 849 }; 850 851 class MMapFileParamTest 852 : public MMapFileTest, 853 public ::testing::WithParamInterface<std::tuple<int, int>> { 854 protected: 855 int prot() const { return std::get<0>(GetParam()); } 856 857 int flags() const { return std::get<1>(GetParam()); } 858 }; 859 860 // MAP_POPULATE allowed. 861 // There isn't a good way to verify it actually did anything. 862 TEST_P(MMapFileParamTest, MapPopulate) { 863 SKIP_IF(!FSSupportsMap()); 864 ASSERT_THAT(Map(0, kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0), 865 SyscallSucceeds()); 866 } 867 868 // MAP_POPULATE on a short file. 869 TEST_P(MMapFileParamTest, MapPopulateShort) { 870 SKIP_IF(!FSSupportsMap()); 871 ASSERT_THAT( 872 Map(0, 2 * kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0), 873 SyscallSucceeds()); 874 } 875 876 // Read contents from mapped file. 877 TEST_F(MMapFileTest, Read) { 878 SKIP_IF(!FSSupportsMap()); 879 size_t len = strlen(kFileContents); 880 ASSERT_EQ(len, Write(kFileContents, len)); 881 882 uintptr_t addr; 883 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), 0), 884 SyscallSucceeds()); 885 886 EXPECT_THAT(reinterpret_cast<char*>(addr), 887 EqualsMemory(std::string(kFileContents))); 888 } 889 890 // Map at an offset. 891 TEST_F(MMapFileTest, MapOffset) { 892 SKIP_IF(!FSSupportsMap()); 893 ASSERT_THAT(lseek(fd_.get(), kPageSize, SEEK_SET), SyscallSucceeds()); 894 895 size_t len = strlen(kFileContents); 896 ASSERT_EQ(len, Write(kFileContents, len)); 897 898 uintptr_t addr; 899 ASSERT_THAT( 900 addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), kPageSize), 901 SyscallSucceeds()); 902 903 EXPECT_THAT(reinterpret_cast<char*>(addr), 904 EqualsMemory(std::string(kFileContents))); 905 } 906 907 TEST_F(MMapFileTest, MapOffsetBeyondEnd) { 908 SKIP_IF(!FSSupportsMap()); 909 SetupGvisorDeathTest(); 910 911 uintptr_t addr; 912 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, 913 fd_.get(), 10 * kPageSize), 914 SyscallSucceeds()); 915 916 // Touching the memory causes SIGBUS. 917 size_t len = strlen(kFileContents); 918 EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, 919 reinterpret_cast<volatile char*>(addr)), 920 ::testing::KilledBySignal(SIGBUS), ""); 921 } 922 923 TEST_F(MMapFileTest, MapSecondToLastPositivePage) { 924 SKIP_IF(!FSSupportsMap()); 925 EXPECT_THAT( 926 Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 927 (std::numeric_limits<off_t>::max() - kPageSize) & ~(kPageSize - 1)), 928 SyscallSucceeds()); 929 } 930 931 TEST_F(MMapFileTest, MapLastPositivePage) { 932 SKIP_IF(!FSSupportsMap()); 933 // For regular files, this should fail due to integer overflow of the end 934 // offset. 935 EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 936 std::numeric_limits<off_t>::max() & ~(kPageSize - 1)), 937 SyscallFailsWithErrno(EOVERFLOW)); 938 } 939 940 TEST_F(MMapFileTest, MapFirstNegativePage) { 941 SKIP_IF(!FSSupportsMap()); 942 EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 943 std::numeric_limits<off_t>::min()), 944 SyscallFailsWithErrno(EOVERFLOW)); 945 } 946 947 TEST_F(MMapFileTest, MapSecondToLastNegativePage) { 948 SKIP_IF(!FSSupportsMap()); 949 EXPECT_THAT( 950 Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), -(2 * kPageSize)), 951 SyscallFailsWithErrno(EOVERFLOW)); 952 } 953 954 TEST_F(MMapFileTest, MapLastNegativePage) { 955 SKIP_IF(!FSSupportsMap()); 956 EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), -kPageSize), 957 SyscallFailsWithErrno(EOVERFLOW)); 958 } 959 960 // MAP_PRIVATE PROT_WRITE is allowed on read-only FDs. 961 TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) { 962 SKIP_IF(!FSSupportsMap()); 963 const FileDescriptor fd = 964 ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY)); 965 966 uintptr_t addr; 967 EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, 968 fd.get(), 0), 969 SyscallSucceeds()); 970 971 // Touch the page to ensure the kernel didn't lie about writability. 972 size_t len = strlen(kFileContents); 973 std::copy(kFileContents, kFileContents + len, 974 reinterpret_cast<volatile char*>(addr)); 975 } 976 977 // MAP_SHARED PROT_WRITE not allowed on read-only FDs. 978 TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) { 979 SKIP_IF(!FSSupportsMap()); 980 const FileDescriptor fd = 981 ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY)); 982 983 uintptr_t addr; 984 EXPECT_THAT( 985 addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0), 986 SyscallFailsWithErrno(EACCES)); 987 } 988 989 // Mmap not allowed on O_PATH FDs. 990 TEST_F(MMapFileTest, MmapFileWithOpath) { 991 SKIP_IF(!FSSupportsMap()); 992 const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); 993 const FileDescriptor fd = 994 ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_PATH)); 995 996 uintptr_t addr; 997 EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd.get(), 0), 998 SyscallFailsWithErrno(EBADF)); 999 } 1000 1001 // The FD must be readable. 1002 TEST_P(MMapFileParamTest, WriteOnlyFd) { 1003 SKIP_IF(!FSSupportsMap()); 1004 const FileDescriptor fd = 1005 ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY)); 1006 1007 uintptr_t addr; 1008 EXPECT_THAT(addr = Map(0, kPageSize, prot(), flags(), fd.get(), 0), 1009 SyscallFailsWithErrno(EACCES)); 1010 } 1011 1012 // Overwriting the contents of a file mapped MAP_SHARED PROT_READ 1013 // should cause the new data to be reflected in the mapping. 1014 TEST_F(MMapFileTest, ReadSharedConsistentWithOverwrite) { 1015 SKIP_IF(!FSSupportsMap()); 1016 // Start from scratch. 1017 EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); 1018 1019 // Expand the file to two pages and dirty them. 1020 std::string bufA(kPageSize, 'a'); 1021 ASSERT_THAT(Write(bufA.c_str(), bufA.size()), 1022 SyscallSucceedsWithValue(bufA.size())); 1023 std::string bufB(kPageSize, 'b'); 1024 ASSERT_THAT(Write(bufB.c_str(), bufB.size()), 1025 SyscallSucceedsWithValue(bufB.size())); 1026 1027 // Map the page. 1028 uintptr_t addr; 1029 ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), 1030 SyscallSucceeds()); 1031 1032 // Check that the mapping contains the right file data. 1033 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize)); 1034 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(), 1035 kPageSize)); 1036 1037 // Start at the beginning of the file. 1038 ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); 1039 1040 // Swap the write pattern. 1041 ASSERT_THAT(Write(bufB.c_str(), bufB.size()), 1042 SyscallSucceedsWithValue(bufB.size())); 1043 ASSERT_THAT(Write(bufA.c_str(), bufA.size()), 1044 SyscallSucceedsWithValue(bufA.size())); 1045 1046 // Check that the mapping got updated. 1047 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufB.c_str(), kPageSize)); 1048 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufA.c_str(), 1049 kPageSize)); 1050 } 1051 1052 // Partially overwriting a file mapped MAP_SHARED PROT_READ should be reflected 1053 // in the mapping. 1054 TEST_F(MMapFileTest, ReadSharedConsistentWithPartialOverwrite) { 1055 SKIP_IF(!FSSupportsMap()); 1056 // Start from scratch. 1057 EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); 1058 1059 // Expand the file to two pages and dirty them. 1060 std::string bufA(kPageSize, 'a'); 1061 ASSERT_THAT(Write(bufA.c_str(), bufA.size()), 1062 SyscallSucceedsWithValue(bufA.size())); 1063 std::string bufB(kPageSize, 'b'); 1064 ASSERT_THAT(Write(bufB.c_str(), bufB.size()), 1065 SyscallSucceedsWithValue(bufB.size())); 1066 1067 // Map the page. 1068 uintptr_t addr; 1069 ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), 1070 SyscallSucceeds()); 1071 1072 // Check that the mapping contains the right file data. 1073 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize)); 1074 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(), 1075 kPageSize)); 1076 1077 // Start at the beginning of the file. 1078 ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); 1079 1080 // Do a partial overwrite, spanning both pages. 1081 std::string bufC(kPageSize + (kPageSize / 2), 'c'); 1082 ASSERT_THAT(Write(bufC.c_str(), bufC.size()), 1083 SyscallSucceedsWithValue(bufC.size())); 1084 1085 // Check that the mapping got updated. 1086 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufC.c_str(), 1087 kPageSize + (kPageSize / 2))); 1088 EXPECT_EQ(0, 1089 memcmp(reinterpret_cast<void*>(addr + kPageSize + (kPageSize / 2)), 1090 bufB.c_str(), kPageSize / 2)); 1091 } 1092 1093 // Overwriting a file mapped MAP_SHARED PROT_READ should be reflected in the 1094 // mapping and the file. 1095 TEST_F(MMapFileTest, ReadSharedConsistentWithWriteAndFile) { 1096 SKIP_IF(!FSSupportsMap()); 1097 // Start from scratch. 1098 EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); 1099 1100 // Expand the file to two full pages and dirty it. 1101 std::string bufA(2 * kPageSize, 'a'); 1102 ASSERT_THAT(Write(bufA.c_str(), bufA.size()), 1103 SyscallSucceedsWithValue(bufA.size())); 1104 1105 // Map only the first page. 1106 uintptr_t addr; 1107 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), 1108 SyscallSucceeds()); 1109 1110 // Prepare to overwrite the file contents. 1111 ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); 1112 1113 // Overwrite everything, beyond the mapped portion. 1114 std::string bufB(2 * kPageSize, 'b'); 1115 ASSERT_THAT(Write(bufB.c_str(), bufB.size()), 1116 SyscallSucceedsWithValue(bufB.size())); 1117 1118 // What the mapped portion should now look like. 1119 std::string bufMapped(kPageSize, 'b'); 1120 1121 // Expect that the mapped portion is consistent. 1122 EXPECT_EQ( 1123 0, memcmp(reinterpret_cast<void*>(addr), bufMapped.c_str(), kPageSize)); 1124 1125 // Prepare to read the entire file contents. 1126 ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); 1127 1128 // Expect that the file was fully updated. 1129 std::vector<char> bufFile(2 * kPageSize); 1130 ASSERT_THAT(Read(bufFile.data(), bufFile.size()), 1131 SyscallSucceedsWithValue(bufFile.size())); 1132 // Cast to void* to avoid EXPECT_THAT assuming bufFile.data() is a 1133 // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C 1134 // std::string, possibly overruning the buffer. 1135 EXPECT_THAT(reinterpret_cast<void*>(bufFile.data()), EqualsMemory(bufB)); 1136 } 1137 1138 // Write data to mapped file. 1139 TEST_F(MMapFileTest, WriteShared) { 1140 SKIP_IF(!FSSupportsMap()); 1141 uintptr_t addr; 1142 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 1143 fd_.get(), 0), 1144 SyscallSucceeds()); 1145 1146 size_t len = strlen(kFileContents); 1147 memcpy(reinterpret_cast<void*>(addr), kFileContents, len); 1148 1149 // The file may not actually be updated until munmap is called. 1150 ASSERT_THAT(Unmap(), SyscallSucceeds()); 1151 1152 std::vector<char> buf(len); 1153 ASSERT_THAT(Read(buf.data(), buf.size()), 1154 SyscallSucceedsWithValue(buf.size())); 1155 // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a 1156 // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C 1157 // string, possibly overruning the buffer. 1158 EXPECT_THAT(reinterpret_cast<void*>(buf.data()), 1159 EqualsMemory(std::string(kFileContents))); 1160 } 1161 1162 // Write data to portion of mapped page beyond the end of the file. 1163 // These writes are not reflected in the file. 1164 TEST_F(MMapFileTest, WriteSharedBeyondEnd) { 1165 SKIP_IF(!FSSupportsMap()); 1166 // The file is only half of a page. We map an entire page. Writes to the 1167 // end of the mapping must not be reflected in the file. 1168 uintptr_t addr; 1169 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 1170 fd_.get(), 0), 1171 SyscallSucceeds()); 1172 1173 // First half; this is reflected in the file. 1174 std::string first(kPageSize / 2, 'A'); 1175 memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); 1176 1177 // Second half; this is not reflected in the file. 1178 std::string second(kPageSize / 2, 'B'); 1179 memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(), 1180 second.size()); 1181 1182 // The file may not actually be updated until munmap is called. 1183 ASSERT_THAT(Unmap(), SyscallSucceeds()); 1184 1185 // Big enough to fit the entire page, if the writes are mistakenly written to 1186 // the file. 1187 std::vector<char> buf(kPageSize); 1188 1189 // Only the first half is in the file. 1190 ASSERT_THAT(Read(buf.data(), buf.size()), 1191 SyscallSucceedsWithValue(first.size())); 1192 // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a 1193 // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C 1194 // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C 1195 // std::string, possibly overruning the buffer. 1196 EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first)); 1197 } 1198 1199 // The portion of a mapped page that becomes part of the file after a truncate 1200 // is reflected in the file. 1201 TEST_F(MMapFileTest, WriteSharedTruncateUp) { 1202 SKIP_IF(!FSSupportsMap()); 1203 // The file is only half of a page. We map an entire page. Writes to the 1204 // end of the mapping must not be reflected in the file. 1205 uintptr_t addr; 1206 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 1207 fd_.get(), 0), 1208 SyscallSucceeds()); 1209 1210 // First half; this is reflected in the file. 1211 std::string first(kPageSize / 2, 'A'); 1212 memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); 1213 1214 // Second half; this is not reflected in the file now (see 1215 // WriteSharedBeyondEnd), but will be after the truncate. 1216 std::string second(kPageSize / 2, 'B'); 1217 memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(), 1218 second.size()); 1219 1220 // Extend the file to a full page. The second half of the page will be 1221 // reflected in the file. 1222 EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); 1223 1224 // The file may not actually be updated until munmap is called. 1225 ASSERT_THAT(Unmap(), SyscallSucceeds()); 1226 1227 // The whole page is in the file. 1228 std::vector<char> buf(kPageSize); 1229 ASSERT_THAT(Read(buf.data(), buf.size()), 1230 SyscallSucceedsWithValue(buf.size())); 1231 // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a 1232 // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C 1233 // string, possibly overruning the buffer. 1234 EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first)); 1235 EXPECT_THAT(reinterpret_cast<void*>(buf.data() + kPageSize / 2), 1236 EqualsMemory(second)); 1237 } 1238 1239 TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) { 1240 SKIP_IF(!FSSupportsMap()); 1241 // Start from scratch. 1242 EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); 1243 1244 // Expand the file to a full page and dirty it. 1245 std::string buf(kPageSize, 'a'); 1246 ASSERT_THAT(Write(buf.c_str(), buf.size()), 1247 SyscallSucceedsWithValue(buf.size())); 1248 1249 // Map the page. 1250 uintptr_t addr; 1251 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), 1252 SyscallSucceeds()); 1253 1254 // Check that the memory contains the file data. 1255 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize)); 1256 1257 // Truncate down, then up. 1258 EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); 1259 EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); 1260 1261 // Check that the memory was zeroed. 1262 std::string zeroed(kPageSize, '\0'); 1263 EXPECT_EQ(0, 1264 memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize)); 1265 1266 // The file may not actually be updated until msync is called. 1267 ASSERT_THAT(Msync(), SyscallSucceeds()); 1268 1269 // Prepare to read the entire file contents. 1270 ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); 1271 1272 // Expect that the file is fully updated. 1273 std::vector<char> bufFile(kPageSize); 1274 ASSERT_THAT(Read(bufFile.data(), bufFile.size()), 1275 SyscallSucceedsWithValue(bufFile.size())); 1276 EXPECT_EQ(0, memcmp(bufFile.data(), zeroed.c_str(), kPageSize)); 1277 } 1278 1279 TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) { 1280 SKIP_IF(!FSSupportsMap()); 1281 // The file is only half of a page. We map an entire page. Writes to the 1282 // end of the mapping must not be reflected in the file. 1283 uintptr_t addr; 1284 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 1285 fd_.get(), 0), 1286 SyscallSucceeds()); 1287 1288 // First half; this will be deleted by truncate(0). 1289 std::string first(kPageSize / 2, 'A'); 1290 memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); 1291 1292 // Truncate down, then up. 1293 EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); 1294 EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); 1295 1296 // The whole page is zeroed in memory. 1297 std::string zeroed(kPageSize, '\0'); 1298 EXPECT_EQ(0, 1299 memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize)); 1300 1301 // The file may not actually be updated until munmap is called. 1302 ASSERT_THAT(Unmap(), SyscallSucceeds()); 1303 1304 // The whole file is also zeroed. 1305 std::vector<char> buf(kPageSize); 1306 ASSERT_THAT(Read(buf.data(), buf.size()), 1307 SyscallSucceedsWithValue(buf.size())); 1308 // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a 1309 // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C 1310 // string, possibly overruning the buffer. 1311 EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(zeroed)); 1312 } 1313 1314 TEST_F(MMapFileTest, ReadSharedTruncateSIGBUS) { 1315 SKIP_IF(!FSSupportsMap()); 1316 SetupGvisorDeathTest(); 1317 1318 // Start from scratch. 1319 EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); 1320 1321 // Expand the file to a full page and dirty it. 1322 std::string buf(kPageSize, 'a'); 1323 ASSERT_THAT(Write(buf.c_str(), buf.size()), 1324 SyscallSucceedsWithValue(buf.size())); 1325 1326 // Map the page. 1327 uintptr_t addr; 1328 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), 1329 SyscallSucceeds()); 1330 1331 // Check that the mapping contains the file data. 1332 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize)); 1333 1334 // Truncate down. 1335 EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); 1336 1337 // Accessing the truncated region should cause a SIGBUS. 1338 std::vector<char> in(kPageSize); 1339 EXPECT_EXIT( 1340 std::copy(reinterpret_cast<volatile char*>(addr), 1341 reinterpret_cast<volatile char*>(addr) + kPageSize, in.data()), 1342 ::testing::KilledBySignal(SIGBUS), ""); 1343 } 1344 1345 TEST_F(MMapFileTest, WriteSharedTruncateSIGBUS) { 1346 SKIP_IF(!FSSupportsMap()); 1347 SetupGvisorDeathTest(); 1348 1349 uintptr_t addr; 1350 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 1351 fd_.get(), 0), 1352 SyscallSucceeds()); 1353 1354 // Touch the memory to be sure it really is mapped. 1355 size_t len = strlen(kFileContents); 1356 memcpy(reinterpret_cast<void*>(addr), kFileContents, len); 1357 1358 // Truncate down. 1359 EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); 1360 1361 // Accessing the truncated file should cause a SIGBUS. 1362 EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, 1363 reinterpret_cast<volatile char*>(addr)), 1364 ::testing::KilledBySignal(SIGBUS), ""); 1365 } 1366 1367 TEST_F(MMapFileTest, ReadSharedTruncatePartialPage) { 1368 SKIP_IF(!FSSupportsMap()); 1369 // Start from scratch. 1370 EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); 1371 1372 // Dirty the file. 1373 std::string buf(kPageSize, 'a'); 1374 ASSERT_THAT(Write(buf.c_str(), buf.size()), 1375 SyscallSucceedsWithValue(buf.size())); 1376 1377 // Map a page. 1378 uintptr_t addr; 1379 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), 1380 SyscallSucceeds()); 1381 1382 // Truncate to half of the page. 1383 EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds()); 1384 1385 // First half of the page untouched. 1386 EXPECT_EQ(0, 1387 memcmp(reinterpret_cast<void*>(addr), buf.data(), kPageSize / 2)); 1388 1389 // Second half is zeroed. 1390 std::string zeroed(kPageSize / 2, '\0'); 1391 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2), 1392 zeroed.c_str(), kPageSize / 2)); 1393 } 1394 1395 // Page can still be accessed and contents are intact after truncating a partial 1396 // page. 1397 TEST_F(MMapFileTest, WriteSharedTruncatePartialPage) { 1398 SKIP_IF(!FSSupportsMap()); 1399 // Expand the file to a full page. 1400 EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); 1401 1402 uintptr_t addr; 1403 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 1404 fd_.get(), 0), 1405 SyscallSucceeds()); 1406 1407 // Fill the entire page. 1408 std::string contents(kPageSize, 'A'); 1409 memcpy(reinterpret_cast<void*>(addr), contents.c_str(), contents.size()); 1410 1411 // Truncate half of the page. 1412 EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds()); 1413 1414 // First half of the page untouched. 1415 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), contents.c_str(), 1416 kPageSize / 2)); 1417 1418 // Second half zeroed. 1419 std::string zeroed(kPageSize / 2, '\0'); 1420 EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2), 1421 zeroed.c_str(), kPageSize / 2)); 1422 } 1423 1424 // MAP_PRIVATE writes are not carried through to the underlying file. 1425 TEST_F(MMapFileTest, WritePrivate) { 1426 SKIP_IF(!FSSupportsMap()); 1427 uintptr_t addr; 1428 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, 1429 fd_.get(), 0), 1430 SyscallSucceeds()); 1431 1432 size_t len = strlen(kFileContents); 1433 memcpy(reinterpret_cast<void*>(addr), kFileContents, len); 1434 1435 // The file should not be updated, but if it mistakenly is, it may not be 1436 // until after munmap is called. 1437 ASSERT_THAT(Unmap(), SyscallSucceeds()); 1438 1439 std::vector<char> buf(len); 1440 ASSERT_THAT(Read(buf.data(), buf.size()), 1441 SyscallSucceedsWithValue(buf.size())); 1442 // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a 1443 // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C 1444 // string, possibly overruning the buffer. 1445 EXPECT_THAT(reinterpret_cast<void*>(buf.data()), 1446 EqualsMemory(std::string(len, '\0'))); 1447 } 1448 1449 // SIGBUS raised when reading or writing past end of a mapped file. 1450 TEST_P(MMapFileParamTest, SigBusDeath) { 1451 SKIP_IF(!FSSupportsMap()); 1452 SetupGvisorDeathTest(); 1453 1454 uintptr_t addr; 1455 ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), 1456 SyscallSucceeds()); 1457 1458 auto* start = reinterpret_cast<volatile char*>(addr + kPageSize); 1459 1460 // MMapFileTest makes a file kPageSize/2 long. The entire first page should be 1461 // accessible, but anything beyond it should not. 1462 if (prot() & PROT_WRITE) { 1463 // Write beyond first page. 1464 size_t len = strlen(kFileContents); 1465 EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, start), 1466 ::testing::KilledBySignal(SIGBUS), ""); 1467 } else { 1468 // Read beyond first page. 1469 std::vector<char> in(kPageSize); 1470 EXPECT_EXIT(std::copy(start, start + kPageSize, in.data()), 1471 ::testing::KilledBySignal(SIGBUS), ""); 1472 } 1473 } 1474 1475 // Tests that SIGBUS is not raised when reading or writing to a file-mapped 1476 // page before EOF, even if part of the mapping extends beyond EOF. 1477 // 1478 // See b/27877699. 1479 TEST_P(MMapFileParamTest, NoSigBusOnPagesBeforeEOF) { 1480 SKIP_IF(!FSSupportsMap()); 1481 uintptr_t addr; 1482 ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), 1483 SyscallSucceeds()); 1484 1485 // The test passes if this survives. 1486 auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1); 1487 size_t len = strlen(kFileContents); 1488 if (prot() & PROT_WRITE) { 1489 std::copy(kFileContents, kFileContents + len, start); 1490 } else { 1491 std::vector<char> in(len); 1492 std::copy(start, start + len, in.data()); 1493 } 1494 } 1495 1496 // Tests that SIGBUS is not raised when reading or writing from a file-mapped 1497 // page containing EOF, *after* the EOF. 1498 TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) { 1499 SKIP_IF(!FSSupportsMap()); 1500 uintptr_t addr; 1501 ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), 1502 SyscallSucceeds()); 1503 1504 // The test passes if this survives. (Technically addr+kPageSize/2 is already 1505 // beyond EOF, but +1 to check for fencepost errors.) 1506 auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1); 1507 size_t len = strlen(kFileContents); 1508 if (prot() & PROT_WRITE) { 1509 std::copy(kFileContents, kFileContents + len, start); 1510 } else { 1511 std::vector<char> in(len); 1512 std::copy(start, start + len, in.data()); 1513 } 1514 } 1515 1516 // Tests that reading from writable shared file-mapped pages succeeds. 1517 // 1518 // On most platforms this is trivial, but when the file is mapped via the sentry 1519 // page cache (which does not yet support writing to shared mappings), a bug 1520 // caused reads to fail unnecessarily on such mappings. See b/28913513. 1521 TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) { 1522 SKIP_IF(!FSSupportsMap()); 1523 uintptr_t addr; 1524 size_t len = strlen(kFileContents); 1525 1526 ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 1527 fd_.get(), 0), 1528 SyscallSucceeds()); 1529 1530 std::vector<char> buf(kPageSize); 1531 // The test passes if this survives. 1532 std::copy(reinterpret_cast<volatile char*>(addr), 1533 reinterpret_cast<volatile char*>(addr) + len, buf.data()); 1534 } 1535 1536 // Tests that EFAULT is returned when invoking a syscall that requires the OS to 1537 // read past end of file (resulting in a fault in sentry context in the gVisor 1538 // case). See b/28913513. 1539 TEST_F(MMapFileTest, InternalSigBus) { 1540 SKIP_IF(!FSSupportsMap()); 1541 uintptr_t addr; 1542 ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, 1543 fd_.get(), 0), 1544 SyscallSucceeds()); 1545 1546 // This depends on the fact that gVisor implements pipes internally. 1547 int pipefd[2]; 1548 ASSERT_THAT(pipe(pipefd), SyscallSucceeds()); 1549 EXPECT_THAT( 1550 write(pipefd[1], reinterpret_cast<void*>(addr + kPageSize), kPageSize), 1551 SyscallFailsWithErrno(EFAULT)); 1552 1553 EXPECT_THAT(close(pipefd[0]), SyscallSucceeds()); 1554 EXPECT_THAT(close(pipefd[1]), SyscallSucceeds()); 1555 } 1556 1557 // Like InternalSigBus, but test the WriteZerosAt path by reading from 1558 // /dev/zero to a shared mapping (so that the SIGBUS isn't caught during 1559 // copy-on-write breaking). 1560 TEST_F(MMapFileTest, InternalSigBusZeroing) { 1561 SKIP_IF(!FSSupportsMap()); 1562 uintptr_t addr; 1563 ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 1564 fd_.get(), 0), 1565 SyscallSucceeds()); 1566 1567 const FileDescriptor dev_zero = 1568 ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); 1569 EXPECT_THAT(read(dev_zero.get(), reinterpret_cast<void*>(addr + kPageSize), 1570 kPageSize), 1571 SyscallFailsWithErrno(EFAULT)); 1572 } 1573 1574 // Checks that mmaps with a length of uint64_t(-PAGE_SIZE + 1) or greater do not 1575 // induce a sentry panic (due to "rounding up" to 0). 1576 TEST_F(MMapTest, HugeLength) { 1577 EXPECT_THAT(Map(0, static_cast<uint64_t>(-kPageSize + 1), PROT_NONE, 1578 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 1579 SyscallFailsWithErrno(ENOMEM)); 1580 } 1581 1582 // Tests for a specific gVisor MM caching bug. 1583 TEST_F(MMapTest, AccessCOWInvalidatesCachedSegments) { 1584 auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); 1585 auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); 1586 auto zero_fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); 1587 1588 // Get a two-page private mapping and fill it with 1s. 1589 uintptr_t addr; 1590 ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, 1591 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 1592 SyscallSucceeds()); 1593 memset(addr_, 1, 2 * kPageSize); 1594 MaybeSave(); 1595 1596 // Fork to make the mapping copy-on-write. 1597 pid_t const pid = fork(); 1598 if (pid == 0) { 1599 // The child process waits for the parent to SIGKILL it. 1600 while (true) { 1601 pause(); 1602 } 1603 } 1604 ASSERT_THAT(pid, SyscallSucceeds()); 1605 auto cleanup_child = Cleanup([&] { 1606 EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds()); 1607 int status; 1608 EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); 1609 }); 1610 1611 // Induce a read-only Access of the first page of the mapping, which will not 1612 // cause a copy. The usermem.Segment should be cached. 1613 ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0), 1614 SyscallSucceedsWithValue(kPageSize)); 1615 1616 // Induce a writable Access of both pages of the mapping. This should 1617 // invalidate the cached Segment. 1618 ASSERT_THAT(PreadFd(zero_fd.get(), addr_, 2 * kPageSize, 0), 1619 SyscallSucceedsWithValue(2 * kPageSize)); 1620 1621 // Induce a read-only Access of the first page of the mapping again. It should 1622 // read the 0s that were stored in the mapping by the read from /dev/zero. If 1623 // the read failed to invalidate the cached Segment, it will instead read the 1624 // 1s in the stale page. 1625 ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0), 1626 SyscallSucceedsWithValue(kPageSize)); 1627 std::vector<char> buf(kPageSize); 1628 ASSERT_THAT(PreadFd(fd.get(), buf.data(), kPageSize, 0), 1629 SyscallSucceedsWithValue(kPageSize)); 1630 for (size_t i = 0; i < kPageSize; i++) { 1631 ASSERT_EQ(0, buf[i]) << "at offset " << i; 1632 } 1633 } 1634 1635 TEST_F(MMapTest, NoReserve) { 1636 const size_t kSize = 10 * 1 << 20; // 10M 1637 uintptr_t addr; 1638 ASSERT_THAT(addr = Map(0, kSize, PROT_READ | PROT_WRITE, 1639 MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0), 1640 SyscallSucceeds()); 1641 EXPECT_GT(addr, 0); 1642 1643 // Check that every page can be read/written. Technically, writing to memory 1644 // could SIGSEGV in case there is no more memory available. In gVisor it 1645 // would never happen though because NORESERVE is ignored. In Linux, it's 1646 // possible to fail, but allocation is small enough that it's highly likely 1647 // to succeed. 1648 for (size_t j = 0; j < kSize; j += kPageSize) { 1649 EXPECT_EQ(0, reinterpret_cast<char*>(addr)[j]); 1650 reinterpret_cast<char*>(addr)[j] = j; 1651 } 1652 } 1653 1654 // Map more than the gVisor page-cache map unit (64k) and ensure that 1655 // it is consistent with reading from the file. 1656 TEST_F(MMapFileTest, Bug38498194) { 1657 SKIP_IF(!FSSupportsMap()); 1658 // Choose a sufficiently large map unit. 1659 constexpr int kSize = 4 * 1024 * 1024; 1660 EXPECT_THAT(ftruncate(fd_.get(), kSize), SyscallSucceeds()); 1661 1662 // Map a large enough region so that multiple internal segments 1663 // are created to back the mapping. 1664 uintptr_t addr; 1665 ASSERT_THAT( 1666 addr = Map(0, kSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0), 1667 SyscallSucceeds()); 1668 1669 std::vector<char> expect(kSize, 'a'); 1670 std::copy(expect.data(), expect.data() + expect.size(), 1671 reinterpret_cast<volatile char*>(addr)); 1672 1673 // Trigger writeback for gVisor. In Linux pages stay cached until 1674 // it can't hold onto them anymore. 1675 ASSERT_THAT(Unmap(), SyscallSucceeds()); 1676 1677 std::vector<char> buf(kSize); 1678 ASSERT_THAT(Read(buf.data(), buf.size()), 1679 SyscallSucceedsWithValue(buf.size())); 1680 EXPECT_EQ(buf, expect) << std::string(buf.data(), buf.size()); 1681 } 1682 1683 // Tests that reading from a file to a memory mapping of the same file does not 1684 // deadlock. See b/34813270. 1685 TEST_F(MMapFileTest, SelfRead) { 1686 SKIP_IF(!FSSupportsMap()); 1687 uintptr_t addr; 1688 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, 1689 fd_.get(), 0), 1690 SyscallSucceeds()); 1691 EXPECT_THAT(Read(reinterpret_cast<char*>(addr), kPageSize / 2), 1692 SyscallSucceedsWithValue(kPageSize / 2)); 1693 // The resulting file contents are poorly-specified and irrelevant. 1694 } 1695 1696 // Tests that writing to a file from a memory mapping of the same file does not 1697 // deadlock. Regression test for b/34813270. 1698 TEST_F(MMapFileTest, SelfWrite) { 1699 SKIP_IF(!FSSupportsMap()); 1700 uintptr_t addr; 1701 ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), 1702 SyscallSucceeds()); 1703 EXPECT_THAT(Write(reinterpret_cast<char*>(addr), kPageSize / 2), 1704 SyscallSucceedsWithValue(kPageSize / 2)); 1705 // The resulting file contents are poorly-specified and irrelevant. 1706 } 1707 1708 TEST(MMapDeathTest, TruncateAfterCOWBreak) { 1709 SetupGvisorDeathTest(); 1710 1711 // Create and map a single-page file. 1712 auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); 1713 auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDWR)); 1714 ASSERT_THAT(ftruncate(fd.get(), kPageSize), SyscallSucceeds()); 1715 1716 auto maybe_mapping = Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, 1717 MAP_PRIVATE, fd.get(), 0); 1718 // Does FS support mmap? 1719 SKIP_IF(maybe_mapping.error().errno_value() == ENODEV); 1720 auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(std::move(maybe_mapping)); 1721 1722 // Write to this mapping, causing the page to be copied for write. 1723 memset(mapping.ptr(), 'a', mapping.len()); 1724 MaybeSave(); // Trigger a co-operative save cycle. 1725 1726 // Truncate the file and expect it to invalidate the copied page. 1727 ASSERT_THAT(ftruncate(fd.get(), 0), SyscallSucceeds()); 1728 EXPECT_EXIT(*reinterpret_cast<volatile char*>(mapping.ptr()), 1729 ::testing::KilledBySignal(SIGBUS), ""); 1730 } 1731 1732 // Regression test for #147. 1733 TEST(MMapNoFixtureTest, MapReadOnlyAfterCreateWriteOnly) { 1734 std::string filename = NewTempAbsPath(); 1735 1736 // We have to create the file O_RDONLY to reproduce the bug because 1737 // fsgofer.localFile.Create() silently upgrades O_WRONLY to O_RDWR, causing 1738 // the cached "write-only" FD to be read/write and therefore usable by mmap(). 1739 auto const ro_fd = ASSERT_NO_ERRNO_AND_VALUE( 1740 Open(filename, O_RDONLY | O_CREAT | O_EXCL, 0666)); 1741 1742 // Get a write-only FD for the same file, which should be ignored by mmap() 1743 // (but isn't in #147). 1744 auto const wo_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_WRONLY)); 1745 ASSERT_THAT(ftruncate(wo_fd.get(), kPageSize), SyscallSucceeds()); 1746 1747 auto maybe_mapping = 1748 Mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, ro_fd.get(), 0); 1749 // Does FS support mmap? 1750 SKIP_IF(maybe_mapping.error().errno_value() == ENODEV); 1751 auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(std::move(maybe_mapping)); 1752 1753 std::vector<char> buf(kPageSize); 1754 // The test passes if this survives. 1755 std::copy(static_cast<char*>(mapping.ptr()), 1756 static_cast<char*>(mapping.endptr()), buf.data()); 1757 } 1758 1759 // Conditional on MAP_32BIT. 1760 // This flag is supported only on x86-64, for 64-bit programs. 1761 #ifdef __x86_64__ 1762 1763 TEST(MMapNoFixtureTest, Map32Bit) { 1764 auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( 1765 MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE | MAP_32BIT)); 1766 EXPECT_LT(mapping.addr(), static_cast<uintptr_t>(1) << 32); 1767 EXPECT_LE(mapping.endaddr(), static_cast<uintptr_t>(1) << 32); 1768 } 1769 1770 #endif // defined(__x86_64__) 1771 1772 INSTANTIATE_TEST_SUITE_P( 1773 ReadWriteSharedPrivate, MMapFileParamTest, 1774 ::testing::Combine(::testing::ValuesIn({ 1775 PROT_READ, 1776 PROT_WRITE, 1777 PROT_READ | PROT_WRITE, 1778 }), 1779 ::testing::ValuesIn({MAP_SHARED, MAP_PRIVATE}))); 1780 1781 } // namespace 1782 1783 } // namespace testing 1784 } // namespace gvisor