gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/syscalls/linux/proc.cc (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <elf.h> 16 #include <errno.h> 17 #include <fcntl.h> 18 #include <limits.h> 19 #include <linux/magic.h> 20 #include <sched.h> 21 #include <signal.h> 22 #include <stddef.h> 23 #include <stdint.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <sys/mman.h> 28 #include <sys/prctl.h> 29 #include <sys/ptrace.h> 30 #include <sys/stat.h> 31 #include <sys/statfs.h> 32 #include <sys/utsname.h> 33 #include <syscall.h> 34 #include <unistd.h> 35 36 #include <algorithm> 37 #include <atomic> 38 #include <functional> 39 #include <iostream> 40 #include <iterator> 41 #include <map> 42 #include <memory> 43 #include <ostream> 44 #include <regex> 45 #include <string> 46 #include <unordered_set> 47 #include <utility> 48 #include <vector> 49 50 #include "gmock/gmock.h" 51 #include "gtest/gtest.h" 52 #include "absl/algorithm/container.h" 53 #include "absl/container/btree_map.h" 54 #include "absl/container/flat_hash_set.h" 55 #include "absl/container/node_hash_set.h" 56 #include "absl/flags/flag.h" 57 #include "absl/strings/ascii.h" 58 #include "absl/strings/match.h" 59 #include "absl/strings/numbers.h" 60 #include "absl/strings/str_cat.h" 61 #include "absl/strings/str_format.h" 62 #include "absl/strings/str_join.h" 63 #include "absl/strings/str_split.h" 64 #include "absl/strings/string_view.h" 65 #include "absl/synchronization/mutex.h" 66 #include "absl/synchronization/notification.h" 67 #include "absl/time/clock.h" 68 #include "absl/time/time.h" 69 #include "test/util/capability_util.h" 70 #include "test/util/cleanup.h" 71 #include "test/util/eventfd_util.h" 72 #include "test/util/file_descriptor.h" 73 #include "test/util/fs_util.h" 74 #include "test/util/memory_util.h" 75 #include "test/util/mount_util.h" 76 #include "test/util/multiprocess_util.h" 77 #include "test/util/posix_error.h" 78 #include "test/util/proc_util.h" 79 #include "test/util/temp_path.h" 80 #include "test/util/test_util.h" 81 #include "test/util/thread_util.h" 82 #include "test/util/time_util.h" 83 #include "test/util/timer_util.h" 84 85 // NOTE(magi): No, this isn't really a syscall but this is a really simple 86 // way to get it tested on both gVisor, PTrace and Linux. 87 88 using ::testing::AllOf; 89 using ::testing::AnyOf; 90 using ::testing::ContainerEq; 91 using ::testing::Contains; 92 using ::testing::ContainsRegex; 93 using ::testing::Eq; 94 using ::testing::Gt; 95 using ::testing::HasSubstr; 96 using ::testing::IsSupersetOf; 97 using ::testing::Pair; 98 using ::testing::StartsWith; 99 using ::testing::UnorderedElementsAre; 100 using ::testing::UnorderedElementsAreArray; 101 102 // Exported by glibc. 103 extern char** environ; 104 105 ABSL_FLAG(bool, proc_pid_reuse_child, false, 106 "If true, run the Proc_PidReuse child workload."); 107 108 namespace gvisor { 109 namespace testing { 110 namespace { 111 112 #ifndef SUID_DUMP_DISABLE 113 #define SUID_DUMP_DISABLE 0 114 #endif /* SUID_DUMP_DISABLE */ 115 #ifndef SUID_DUMP_USER 116 #define SUID_DUMP_USER 1 117 #endif /* SUID_DUMP_USER */ 118 #ifndef SUID_DUMP_ROOT 119 #define SUID_DUMP_ROOT 2 120 #endif /* SUID_DUMP_ROOT */ 121 122 #if defined(__x86_64__) || defined(__i386__) 123 // This list of "required" fields is taken from reading the file 124 // arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally 125 // printed by the kernel. 126 static const char* required_fields[] = { 127 "processor", 128 "vendor_id", 129 "cpu family", 130 "model\t\t:", 131 "model name", 132 "stepping", 133 "cpu MHz", 134 "physical id", 135 "siblings", 136 "core id", 137 "cpu cores", 138 "apicid\t\t:", 139 "initial apicid", 140 "fpu\t\t:", 141 "fpu_exception", 142 "cpuid level", 143 "wp", 144 "bogomips", 145 "clflush size", 146 "cache_alignment", 147 "address sizes", 148 "power management", 149 }; 150 #elif defined(__aarch64__) 151 // This list of "required" fields is taken from reading the file 152 // arch/arm64/kernel/cpuinfo.c and seeing which fields will be unconditionally 153 // printed by the kernel. 154 static const char* required_fields[] = { 155 "processor", "BogoMIPS", "Features", "CPU implementer", 156 "CPU architecture", "CPU variant", "CPU part", "CPU revision", 157 }; 158 #elif defined(__riscv) 159 // This list of "required" fields is taken from reading the file 160 // arch/riscv/kernel/cpu.c and seeing which fields will be unconditionally 161 // printed by the kernel. 162 static const char* required_fields[] = { 163 "processor", 164 "hart", 165 }; 166 #else 167 #error "Unknown architecture" 168 #endif 169 170 // Takes the subprocess command line and pid. 171 // If it returns !OK, WithSubprocess returns immediately. 172 using SubprocessCallback = std::function<PosixError(int)>; 173 174 std::vector<std::string> saved_argv; // NOLINT 175 176 // Helper function to dump /proc/{pid}/status and check the 177 // state data. State should = "Z" for zombied or "RSD" for 178 // running, interruptible sleeping (S), or uninterruptible sleep 179 // (D). 180 void CompareProcessState(absl::string_view state, int pid) { 181 auto status_file = ASSERT_NO_ERRNO_AND_VALUE( 182 GetContents(absl::StrCat("/proc/", pid, "/status"))); 183 // N.B. POSIX extended regexes don't support shorthand character classes (\w) 184 // inside of brackets. 185 EXPECT_THAT(status_file, 186 ContainsRegex(absl::StrCat("State:.[", state, 187 R"EOL(]\s+\([a-zA-Z ]+\))EOL"))); 188 } 189 190 // Run callbacks while a subprocess is running, zombied, and/or exited. 191 PosixError WithSubprocess(SubprocessCallback const& running, 192 SubprocessCallback const& zombied, 193 SubprocessCallback const& exited) { 194 int pipe_fds[2] = {}; 195 if (pipe(pipe_fds) < 0) { 196 return PosixError(errno, "pipe"); 197 } 198 199 int child_pid = fork(); 200 if (child_pid < 0) { 201 return PosixError(errno, "fork"); 202 } 203 204 if (child_pid == 0) { 205 close(pipe_fds[0]); // Close the read end. 206 const DisableSave ds; // Timing issues. 207 208 // Write to the pipe to tell it we're ready. 209 char buf = 'a'; 210 int res = 0; 211 res = WriteFd(pipe_fds[1], &buf, sizeof(buf)); 212 TEST_CHECK_MSG(res == sizeof(buf), "Write failure in subprocess"); 213 214 while (true) { 215 SleepSafe(absl::Milliseconds(100)); 216 } 217 } 218 219 close(pipe_fds[1]); // Close the write end. 220 221 int status = 0; 222 auto wait_cleanup = Cleanup([child_pid, &status] { 223 EXPECT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); 224 }); 225 auto kill_cleanup = Cleanup([child_pid] { 226 EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); 227 }); 228 229 // Wait for the child. 230 char buf = 0; 231 int res = ReadFd(pipe_fds[0], &buf, sizeof(buf)); 232 if (res < 0) { 233 return PosixError(errno, "Read from pipe"); 234 } else if (res == 0) { 235 return PosixError(EPIPE, "Unable to read from pipe: EOF"); 236 } 237 238 if (running) { 239 // The first arg, RSD, refers to a "running process", or a process with a 240 // state of Running (R), Interruptable Sleep (S) or Uninterruptable 241 // Sleep (D). 242 CompareProcessState("RSD", child_pid); 243 RETURN_IF_ERRNO(running(child_pid)); 244 } 245 246 // Kill the process. 247 kill_cleanup.Release()(); 248 siginfo_t info; 249 // Wait until the child process has exited (WEXITED flag) but don't 250 // reap the child (WNOWAIT flag). 251 EXPECT_THAT(waitid(P_PID, child_pid, &info, WNOWAIT | WEXITED), 252 SyscallSucceeds()); 253 254 if (zombied) { 255 // Arg of "Z" refers to a Zombied Process. 256 CompareProcessState("Z", child_pid); 257 RETURN_IF_ERRNO(zombied(child_pid)); 258 } 259 260 // Wait on the process. 261 wait_cleanup.Release()(); 262 // If the process is reaped, then then this should return 263 // with ECHILD. 264 EXPECT_THAT(waitpid(child_pid, &status, WNOHANG), 265 SyscallFailsWithErrno(ECHILD)); 266 267 if (exited) { 268 RETURN_IF_ERRNO(exited(child_pid)); 269 } 270 271 return NoError(); 272 } 273 274 // Access the file returned by name when a subprocess is running. 275 PosixError AccessWhileRunning(std::function<std::string(int pid)> name, 276 int flags, std::function<void(int fd)> access) { 277 FileDescriptor fd; 278 return WithSubprocess( 279 [&](int pid) -> PosixError { 280 // Running. 281 ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); 282 283 access(fd.get()); 284 return NoError(); 285 }, 286 nullptr, nullptr); 287 } 288 289 // Access the file returned by name when the a subprocess is zombied. 290 PosixError AccessWhileZombied(std::function<std::string(int pid)> name, 291 int flags, std::function<void(int fd)> access) { 292 FileDescriptor fd; 293 return WithSubprocess( 294 [&](int pid) -> PosixError { 295 // Running. 296 ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); 297 return NoError(); 298 }, 299 [&](int pid) -> PosixError { 300 // Zombied. 301 access(fd.get()); 302 return NoError(); 303 }, 304 nullptr); 305 } 306 307 // Access the file returned by name when the a subprocess is exited. 308 PosixError AccessWhileExited(std::function<std::string(int pid)> name, 309 int flags, std::function<void(int fd)> access) { 310 FileDescriptor fd; 311 return WithSubprocess( 312 [&](int pid) -> PosixError { 313 // Running. 314 ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); 315 return NoError(); 316 }, 317 nullptr, 318 [&](int pid) -> PosixError { 319 // Exited. 320 access(fd.get()); 321 return NoError(); 322 }); 323 } 324 325 // ReadFd(fd=/proc/PID/basename) while PID is running. 326 int ReadWhileRunning(std::string const& basename, void* buf, size_t count) { 327 int ret = 0; 328 int err = 0; 329 EXPECT_NO_ERRNO(AccessWhileRunning( 330 [&](int pid) -> std::string { 331 return absl::StrCat("/proc/", pid, "/", basename); 332 }, 333 O_RDONLY, 334 [&](int fd) { 335 ret = ReadFd(fd, buf, count); 336 err = errno; 337 })); 338 errno = err; 339 return ret; 340 } 341 342 // ReadFd(fd=/proc/PID/basename) while PID is zombied. 343 int ReadWhileZombied(std::string const& basename, void* buf, size_t count) { 344 int ret = 0; 345 int err = 0; 346 EXPECT_NO_ERRNO(AccessWhileZombied( 347 [&](int pid) -> std::string { 348 return absl::StrCat("/proc/", pid, "/", basename); 349 }, 350 O_RDONLY, 351 [&](int fd) { 352 ret = ReadFd(fd, buf, count); 353 err = errno; 354 })); 355 errno = err; 356 return ret; 357 } 358 359 // ReadFd(fd=/proc/PID/basename) while PID is exited. 360 int ReadWhileExited(std::string const& basename, void* buf, size_t count) { 361 int ret = 0; 362 int err = 0; 363 EXPECT_NO_ERRNO(AccessWhileExited( 364 [&](int pid) -> std::string { 365 return absl::StrCat("/proc/", pid, "/", basename); 366 }, 367 O_RDONLY, 368 [&](int fd) { 369 ret = ReadFd(fd, buf, count); 370 err = errno; 371 })); 372 errno = err; 373 return ret; 374 } 375 376 // readlinkat(fd=/proc/PID/, basename) while PID is running. 377 int ReadlinkWhileRunning(std::string const& basename, char* buf, size_t count) { 378 int ret = 0; 379 int err = 0; 380 EXPECT_NO_ERRNO(AccessWhileRunning( 381 [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, 382 O_DIRECTORY, 383 [&](int fd) { 384 ret = readlinkat(fd, basename.c_str(), buf, count); 385 err = errno; 386 })); 387 errno = err; 388 return ret; 389 } 390 391 // readlinkat(fd=/proc/PID/, basename) while PID is zombied. 392 int ReadlinkWhileZombied(std::string const& basename, char* buf, size_t count) { 393 int ret = 0; 394 int err = 0; 395 EXPECT_NO_ERRNO(AccessWhileZombied( 396 [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, 397 O_DIRECTORY, 398 [&](int fd) { 399 ret = readlinkat(fd, basename.c_str(), buf, count); 400 err = errno; 401 })); 402 errno = err; 403 return ret; 404 } 405 406 // readlinkat(fd=/proc/PID/, basename) while PID is exited. 407 int ReadlinkWhileExited(std::string const& basename, char* buf, size_t count) { 408 int ret = 0; 409 int err = 0; 410 EXPECT_NO_ERRNO(AccessWhileExited( 411 [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, 412 O_DIRECTORY, 413 [&](int fd) { 414 ret = readlinkat(fd, basename.c_str(), buf, count); 415 err = errno; 416 })); 417 errno = err; 418 return ret; 419 } 420 421 void RemoveUnstableCPUInfoFields(std::vector<std::string>& cpu_info_fields) { 422 const std::vector<std::string> unstable_fields{"cpu MHz", "bogomips"}; 423 auto it = cpu_info_fields.begin(); 424 while (it != cpu_info_fields.end()) { 425 bool found = false; 426 for (const std::string& unstable_field : unstable_fields) { 427 if (it->find(unstable_field) != std::string::npos) { 428 found = true; 429 break; 430 } 431 } 432 if (found) { 433 it = cpu_info_fields.erase(it); 434 } else { 435 ++it; 436 } 437 } 438 } 439 440 TEST(ProcTest, NotFoundInRoot) { 441 struct stat s; 442 EXPECT_THAT(stat("/proc/foobar", &s), SyscallFailsWithErrno(ENOENT)); 443 } 444 445 TEST(ProcSelfTest, IsThreadGroupLeader) { 446 ScopedThread([] { 447 const pid_t tgid = getpid(); 448 const pid_t tid = syscall(SYS_gettid); 449 EXPECT_NE(tgid, tid); 450 auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self")); 451 EXPECT_EQ(link, absl::StrCat(tgid)); 452 }); 453 } 454 455 TEST(ProcThreadSelfTest, Basic) { 456 const pid_t tgid = getpid(); 457 const pid_t tid = syscall(SYS_gettid); 458 EXPECT_EQ(tgid, tid); 459 auto link_threadself = 460 ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self")); 461 EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid)); 462 // Just read one file inside thread-self to ensure that the link is valid. 463 auto link_threadself_exe = 464 ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe")); 465 auto link_procself_exe = 466 ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); 467 EXPECT_EQ(link_threadself_exe, link_procself_exe); 468 } 469 470 TEST(ProcThreadSelfTest, Thread) { 471 ScopedThread([] { 472 const pid_t tgid = getpid(); 473 const pid_t tid = syscall(SYS_gettid); 474 EXPECT_NE(tgid, tid); 475 auto link_threadself = 476 ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self")); 477 478 EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid)); 479 // Just read one file inside thread-self to ensure that the link is valid. 480 auto link_threadself_exe = 481 ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe")); 482 auto link_procself_exe = 483 ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); 484 EXPECT_EQ(link_threadself_exe, link_procself_exe); 485 // A thread should not have "/proc/<tid>/task". 486 struct stat s; 487 EXPECT_THAT(stat("/proc/thread-self/task", &s), 488 SyscallFailsWithErrno(ENOENT)); 489 }); 490 } 491 492 // Returns the /proc/PID/maps entry for the MAP_PRIVATE | MAP_ANONYMOUS mapping 493 // m with start address addr and length len. 494 std::string AnonymousMapsEntry(uintptr_t addr, size_t len, int prot) { 495 return absl::StrCat(absl::Hex(addr, absl::PadSpec::kZeroPad8), "-", 496 absl::Hex(addr + len, absl::PadSpec::kZeroPad8), " ", 497 prot & PROT_READ ? "r" : "-", 498 prot & PROT_WRITE ? "w" : "-", 499 prot & PROT_EXEC ? "x" : "-", "p 00000000 00:00 0 "); 500 } 501 502 std::string AnonymousMapsEntryForMapping(const Mapping& m, int prot) { 503 return AnonymousMapsEntry(m.addr(), m.len(), prot); 504 } 505 506 PosixErrorOr<std::map<uint64_t, uint64_t>> ReadProcSelfAuxv() { 507 std::string auxv_file; 508 RETURN_IF_ERRNO(GetContents("/proc/self/auxv", &auxv_file)); 509 const Elf64_auxv_t* auxv_data = 510 reinterpret_cast<const Elf64_auxv_t*>(auxv_file.data()); 511 std::map<uint64_t, uint64_t> auxv_entries; 512 for (int i = 0; auxv_data[i].a_type != AT_NULL; i++) { 513 auto a_type = auxv_data[i].a_type; 514 EXPECT_EQ(0, auxv_entries.count(a_type)) << "a_type: " << a_type; 515 auxv_entries.emplace(a_type, auxv_data[i].a_un.a_val); 516 } 517 return auxv_entries; 518 } 519 520 TEST(ProcSelfAuxv, EntryPresence) { 521 auto auxv_entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv()); 522 523 EXPECT_EQ(auxv_entries.count(AT_ENTRY), 1); 524 EXPECT_EQ(auxv_entries.count(AT_PHDR), 1); 525 EXPECT_EQ(auxv_entries.count(AT_PHENT), 1); 526 EXPECT_EQ(auxv_entries.count(AT_PHNUM), 1); 527 EXPECT_EQ(auxv_entries.count(AT_BASE), 1); 528 EXPECT_EQ(auxv_entries.count(AT_UID), 1); 529 EXPECT_EQ(auxv_entries.count(AT_EUID), 1); 530 EXPECT_EQ(auxv_entries.count(AT_GID), 1); 531 EXPECT_EQ(auxv_entries.count(AT_EGID), 1); 532 EXPECT_EQ(auxv_entries.count(AT_SECURE), 1); 533 EXPECT_EQ(auxv_entries.count(AT_CLKTCK), 1); 534 EXPECT_EQ(auxv_entries.count(AT_RANDOM), 1); 535 EXPECT_EQ(auxv_entries.count(AT_EXECFN), 1); 536 EXPECT_EQ(auxv_entries.count(AT_PAGESZ), 1); 537 EXPECT_EQ(auxv_entries.count(AT_SYSINFO_EHDR), 1); 538 } 539 540 TEST(ProcSelfAuxv, EntryValues) { 541 auto proc_auxv = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv()); 542 543 // We need to find the ELF auxiliary vector. The section of memory pointed to 544 // by envp contains some pointers to non-null pointers, followed by a single 545 // pointer to a null pointer, followed by the auxiliary vector. 546 char** envpi = environ; 547 while (*envpi) { 548 ++envpi; 549 } 550 551 const Elf64_auxv_t* envp_auxv = 552 reinterpret_cast<const Elf64_auxv_t*>(envpi + 1); 553 int i; 554 for (i = 0; envp_auxv[i].a_type != AT_NULL; i++) { 555 auto a_type = envp_auxv[i].a_type; 556 EXPECT_EQ(proc_auxv.count(a_type), 1); 557 EXPECT_EQ(proc_auxv[a_type], envp_auxv[i].a_un.a_val) 558 << "a_type: " << a_type; 559 } 560 EXPECT_EQ(i, proc_auxv.size()); 561 } 562 563 // Just open and read a part of /proc/self/mem, check that we can read an item. 564 TEST(ProcPidMem, Read) { 565 auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY)); 566 char input[] = "hello-world"; 567 char output[sizeof(input)]; 568 ASSERT_THAT(pread(memfd.get(), output, sizeof(output), 569 reinterpret_cast<off_t>(input)), 570 SyscallSucceedsWithValue(sizeof(input))); 571 ASSERT_STREQ(input, output); 572 } 573 574 // Perform read on an unmapped region. 575 TEST(ProcPidMem, Unmapped) { 576 // Strategy: map then unmap, so we have a guaranteed unmapped region 577 auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY)); 578 Mapping mapping = ASSERT_NO_ERRNO_AND_VALUE( 579 MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); 580 // Fill it with things 581 memset(mapping.ptr(), 'x', mapping.len()); 582 char expected = 'x', output; 583 ASSERT_THAT(pread(memfd.get(), &output, sizeof(output), 584 reinterpret_cast<off_t>(mapping.ptr())), 585 SyscallSucceedsWithValue(sizeof(output))); 586 ASSERT_EQ(expected, output); 587 588 const auto rest = [&] { 589 // This is a new process, so we need to re-open /proc/self/mem. 590 int memfd = open("/proc/self/mem", O_RDONLY); 591 TEST_PCHECK_MSG(memfd >= 0, "open failed"); 592 // Unmap region again 593 TEST_PCHECK_MSG(MunmapSafe(mapping.ptr(), mapping.len()) == 0, 594 "munmap failed"); 595 // Now we want EIO error 596 TEST_CHECK(pread(memfd, &output, sizeof(output), 597 reinterpret_cast<off_t>(mapping.ptr())) == -1); 598 TEST_PCHECK_MSG(errno == EIO, "pread failed with unexpected errno"); 599 }; 600 601 EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); 602 } 603 604 // Perform read repeatedly to verify offset change. 605 TEST(ProcPidMem, RepeatedRead) { 606 auto const num_reads = 3; 607 char expected[] = "01234567890abcdefghijkl"; 608 char output[sizeof(expected) / num_reads]; 609 610 auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY)); 611 ASSERT_THAT(lseek(memfd.get(), reinterpret_cast<off_t>(&expected), SEEK_SET), 612 SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected))); 613 for (auto i = 0; i < num_reads; i++) { 614 ASSERT_THAT(read(memfd.get(), &output, sizeof(output)), 615 SyscallSucceedsWithValue(sizeof(output))); 616 ASSERT_EQ(strncmp(&expected[i * sizeof(output)], output, sizeof(output)), 617 0); 618 } 619 } 620 621 // Perform seek operations repeatedly. 622 TEST(ProcPidMem, RepeatedSeek) { 623 auto const num_reads = 3; 624 char expected[] = "01234567890abcdefghijkl"; 625 char output[sizeof(expected) / num_reads]; 626 627 auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY)); 628 ASSERT_THAT(lseek(memfd.get(), reinterpret_cast<off_t>(&expected), SEEK_SET), 629 SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected))); 630 // Read from start 631 ASSERT_THAT(read(memfd.get(), &output, sizeof(output)), 632 SyscallSucceedsWithValue(sizeof(output))); 633 ASSERT_EQ(strncmp(&expected[0 * sizeof(output)], output, sizeof(output)), 0); 634 // Skip ahead one read 635 ASSERT_THAT(lseek(memfd.get(), sizeof(output), SEEK_CUR), 636 SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected) + 637 sizeof(output) * 2)); 638 // Do read again 639 ASSERT_THAT(read(memfd.get(), &output, sizeof(output)), 640 SyscallSucceedsWithValue(sizeof(output))); 641 ASSERT_EQ(strncmp(&expected[2 * sizeof(output)], output, sizeof(output)), 0); 642 // Skip back three reads 643 ASSERT_THAT(lseek(memfd.get(), -3 * sizeof(output), SEEK_CUR), 644 SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected))); 645 // Do read again 646 ASSERT_THAT(read(memfd.get(), &output, sizeof(output)), 647 SyscallSucceedsWithValue(sizeof(output))); 648 ASSERT_EQ(strncmp(&expected[0 * sizeof(output)], output, sizeof(output)), 0); 649 // Check that SEEK_END does not work 650 ASSERT_THAT(lseek(memfd.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL)); 651 } 652 653 // Perform read past an allocated memory region. 654 TEST(ProcPidMem, PartialRead) { 655 // Reserve 2 pages. 656 Mapping mapping = ASSERT_NO_ERRNO_AND_VALUE( 657 MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); 658 659 // Fill the first page with data. 660 memset(mapping.ptr(), 'x', kPageSize); 661 662 char expected[] = {'x'}; 663 std::unique_ptr<char[]> output(new char[kPageSize]); 664 off_t read_offset = reinterpret_cast<off_t>(mapping.ptr()) + kPageSize - 1; 665 const auto rest = [&] { 666 int memfd = open("/proc/self/mem", O_RDONLY); 667 TEST_PCHECK_MSG(memfd >= 0, "open failed"); 668 // Unmap the second page. 669 TEST_PCHECK_MSG( 670 MunmapSafe(reinterpret_cast<void*>(mapping.addr() + kPageSize), 671 kPageSize) == 0, 672 "munmap failed"); 673 // Expect to read up to the end of the first page without getting EIO. 674 TEST_PCHECK_MSG( 675 pread(memfd, output.get(), kPageSize, read_offset) == sizeof(expected), 676 "pread failed"); 677 TEST_CHECK(expected[0] == output.get()[0]); 678 }; 679 680 EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); 681 } 682 683 // Perform read on /proc/[pid]/mem after exit. 684 TEST(ProcPidMem, AfterExit) { 685 int pfd1[2] = {}; 686 int pfd2[2] = {}; 687 688 char expected[] = "hello-world"; 689 690 ASSERT_THAT(pipe(pfd1), SyscallSucceeds()); 691 ASSERT_THAT(pipe(pfd2), SyscallSucceeds()); 692 693 // Create child process 694 pid_t const child_pid = fork(); 695 if (child_pid == 0) { 696 // Close reading end of first pipe 697 close(pfd1[0]); 698 699 // Tell parent about location of input 700 char ok = 1; 701 TEST_CHECK(WriteFd(pfd1[1], &ok, sizeof(ok)) == sizeof(ok)); 702 TEST_PCHECK(close(pfd1[1]) == 0); 703 704 // Close writing end of second pipe 705 TEST_PCHECK(close(pfd2[1]) == 0); 706 707 // Await parent OK to die 708 ok = 0; 709 TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok)); 710 711 // Close rest pipes 712 TEST_PCHECK(close(pfd2[0]) == 0); 713 _exit(0); 714 } 715 716 // In parent process. 717 ASSERT_THAT(child_pid, SyscallSucceeds()); 718 719 // Close writing end of first pipe 720 EXPECT_THAT(close(pfd1[1]), SyscallSucceeds()); 721 722 // Wait for child to be alive and well 723 char ok = 0; 724 EXPECT_THAT(ReadFd(pfd1[0], &ok, sizeof(ok)), 725 SyscallSucceedsWithValue(sizeof(ok))); 726 // Close reading end of first pipe 727 EXPECT_THAT(close(pfd1[0]), SyscallSucceeds()); 728 729 // Open /proc/pid/mem fd 730 std::string mempath = absl::StrCat("/proc/", child_pid, "/mem"); 731 auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY)); 732 733 // Expect that we can read 734 char output[sizeof(expected)]; 735 EXPECT_THAT(pread(memfd.get(), &output, sizeof(output), 736 reinterpret_cast<off_t>(&expected)), 737 SyscallSucceedsWithValue(sizeof(output))); 738 EXPECT_STREQ(expected, output); 739 740 // Tell proc its ok to go 741 EXPECT_THAT(close(pfd2[0]), SyscallSucceeds()); 742 ok = 1; 743 EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)), 744 SyscallSucceedsWithValue(sizeof(ok))); 745 EXPECT_THAT(close(pfd2[1]), SyscallSucceeds()); 746 747 // Expect termination 748 int status; 749 ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); 750 751 // Expect that we can't read anymore 752 EXPECT_THAT(pread(memfd.get(), &output, sizeof(output), 753 reinterpret_cast<off_t>(&expected)), 754 SyscallSucceedsWithValue(0)); 755 } 756 757 // Read from /proc/[pid]/mem with different UID/GID and attached state. 758 TEST(ProcPidMem, DifferentUserAttached) { 759 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); 760 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_DAC_OVERRIDE))); 761 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_PTRACE))); 762 763 int pfd1[2] = {}; 764 int pfd2[2] = {}; 765 766 ASSERT_THAT(pipe(pfd1), SyscallSucceeds()); 767 ASSERT_THAT(pipe(pfd2), SyscallSucceeds()); 768 769 // Create child process 770 pid_t const child_pid = fork(); 771 if (child_pid == 0) { 772 // Close reading end of first pipe 773 close(pfd1[0]); 774 775 // Tell parent about location of input 776 char input[] = "hello-world"; 777 off_t input_location = reinterpret_cast<off_t>(input); 778 TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) == 779 sizeof(input_location)); 780 TEST_PCHECK(close(pfd1[1]) == 0); 781 782 // Close writing end of second pipe 783 TEST_PCHECK(close(pfd2[1]) == 0); 784 785 // Await parent OK to die 786 char ok = 0; 787 TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok)); 788 789 // Close rest pipes 790 TEST_PCHECK(close(pfd2[0]) == 0); 791 _exit(0); 792 } 793 794 // In parent process. 795 ASSERT_THAT(child_pid, SyscallSucceeds()); 796 797 // Close writing end of first pipe 798 EXPECT_THAT(close(pfd1[1]), SyscallSucceeds()); 799 800 // Read target location from child 801 off_t target_location; 802 EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)), 803 SyscallSucceedsWithValue(sizeof(target_location))); 804 // Close reading end of first pipe 805 EXPECT_THAT(close(pfd1[0]), SyscallSucceeds()); 806 807 ScopedThread([&] { 808 // Attach to child subprocess without stopping it 809 EXPECT_THAT(ptrace(PTRACE_SEIZE, child_pid, NULL, NULL), SyscallSucceeds()); 810 811 // Keep capabilities after setuid 812 EXPECT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds()); 813 constexpr int kNobody = 65534; 814 EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds()); 815 816 // Only restore CAP_SYS_PTRACE and CAP_DAC_OVERRIDE 817 EXPECT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, true)); 818 EXPECT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, true)); 819 820 // Open /proc/pid/mem fd 821 std::string mempath = absl::StrCat("/proc/", child_pid, "/mem"); 822 auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY)); 823 char expected[] = "hello-world"; 824 char output[sizeof(expected)]; 825 EXPECT_THAT(pread(memfd.get(), output, sizeof(output), 826 reinterpret_cast<off_t>(target_location)), 827 SyscallSucceedsWithValue(sizeof(output))); 828 EXPECT_STREQ(expected, output); 829 830 // Tell proc its ok to go 831 EXPECT_THAT(close(pfd2[0]), SyscallSucceeds()); 832 char ok = 1; 833 EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)), 834 SyscallSucceedsWithValue(sizeof(ok))); 835 EXPECT_THAT(close(pfd2[1]), SyscallSucceeds()); 836 837 // Expect termination 838 int status; 839 ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); 840 EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) 841 << " status " << status; 842 }); 843 } 844 845 // Attempt to read from /proc/[pid]/mem with different UID/GID. 846 TEST(ProcPidMem, DifferentUser) { 847 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); 848 849 int pfd1[2] = {}; 850 int pfd2[2] = {}; 851 852 ASSERT_THAT(pipe(pfd1), SyscallSucceeds()); 853 ASSERT_THAT(pipe(pfd2), SyscallSucceeds()); 854 855 // Create child process 856 pid_t const child_pid = fork(); 857 if (child_pid == 0) { 858 // Close reading end of first pipe 859 close(pfd1[0]); 860 861 // Tell parent about location of input 862 char input[] = "hello-world"; 863 off_t input_location = reinterpret_cast<off_t>(input); 864 TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) == 865 sizeof(input_location)); 866 TEST_PCHECK(close(pfd1[1]) == 0); 867 868 // Close writing end of second pipe 869 TEST_PCHECK(close(pfd2[1]) == 0); 870 871 // Await parent OK to die 872 char ok = 0; 873 TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok)); 874 875 // Close rest pipes 876 TEST_PCHECK(close(pfd2[0]) == 0); 877 _exit(0); 878 } 879 880 // In parent process. 881 ASSERT_THAT(child_pid, SyscallSucceeds()); 882 883 // Close writing end of first pipe 884 EXPECT_THAT(close(pfd1[1]), SyscallSucceeds()); 885 886 // Read target location from child 887 off_t target_location; 888 EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)), 889 SyscallSucceedsWithValue(sizeof(target_location))); 890 // Close reading end of first pipe 891 EXPECT_THAT(close(pfd1[0]), SyscallSucceeds()); 892 893 ScopedThread([&] { 894 constexpr int kNobody = 65534; 895 EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds()); 896 897 // Attempt to open /proc/[child_pid]/mem 898 std::string mempath = absl::StrCat("/proc/", child_pid, "/mem"); 899 EXPECT_THAT(open(mempath.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES)); 900 901 // Tell proc its ok to go 902 EXPECT_THAT(close(pfd2[0]), SyscallSucceeds()); 903 char ok = 1; 904 EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)), 905 SyscallSucceedsWithValue(sizeof(ok))); 906 EXPECT_THAT(close(pfd2[1]), SyscallSucceeds()); 907 908 // Expect termination 909 int status; 910 ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); 911 }); 912 } 913 914 // Perform read on /proc/[pid]/mem with same UID/GID. 915 TEST(ProcPidMem, SameUser) { 916 int pfd1[2] = {}; 917 int pfd2[2] = {}; 918 919 ASSERT_THAT(pipe(pfd1), SyscallSucceeds()); 920 ASSERT_THAT(pipe(pfd2), SyscallSucceeds()); 921 922 // Create child process 923 pid_t const child_pid = fork(); 924 if (child_pid == 0) { 925 // Close reading end of first pipe 926 close(pfd1[0]); 927 928 // Tell parent about location of input 929 char input[] = "hello-world"; 930 off_t input_location = reinterpret_cast<off_t>(input); 931 TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) == 932 sizeof(input_location)); 933 TEST_PCHECK(close(pfd1[1]) == 0); 934 935 // Close writing end of second pipe 936 TEST_PCHECK(close(pfd2[1]) == 0); 937 938 // Await parent OK to die 939 char ok = 0; 940 TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok)); 941 942 // Close rest pipes 943 TEST_PCHECK(close(pfd2[0]) == 0); 944 _exit(0); 945 } 946 // In parent process. 947 ASSERT_THAT(child_pid, SyscallSucceeds()); 948 949 // Close writing end of first pipe 950 EXPECT_THAT(close(pfd1[1]), SyscallSucceeds()); 951 952 // Read target location from child 953 off_t target_location; 954 EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)), 955 SyscallSucceedsWithValue(sizeof(target_location))); 956 // Close reading end of first pipe 957 EXPECT_THAT(close(pfd1[0]), SyscallSucceeds()); 958 959 // Open /proc/pid/mem fd 960 std::string mempath = absl::StrCat("/proc/", child_pid, "/mem"); 961 auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY)); 962 char expected[] = "hello-world"; 963 char output[sizeof(expected)]; 964 EXPECT_THAT(pread(memfd.get(), output, sizeof(output), 965 reinterpret_cast<off_t>(target_location)), 966 SyscallSucceedsWithValue(sizeof(output))); 967 EXPECT_STREQ(expected, output); 968 969 // Tell proc its ok to go 970 EXPECT_THAT(close(pfd2[0]), SyscallSucceeds()); 971 char ok = 1; 972 EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)), 973 SyscallSucceedsWithValue(sizeof(ok))); 974 EXPECT_THAT(close(pfd2[1]), SyscallSucceeds()); 975 976 // Expect termination 977 int status; 978 ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); 979 } 980 981 // Just open and read /proc/self/maps, check that we can find [stack] 982 TEST(ProcSelfMaps, Basic) { 983 auto proc_self_maps = 984 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); 985 986 std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); 987 std::vector<std::string> stacks; 988 // Make sure there's a stack in there. 989 for (const auto& str : strings) { 990 if (str.find("[stack]") != std::string::npos) { 991 stacks.push_back(str); 992 } 993 } 994 ASSERT_EQ(1, stacks.size()) << "[stack] not found in: " << proc_self_maps; 995 // Linux pads to 73 characters then we add 7. 996 EXPECT_EQ(80, stacks[0].length()); 997 } 998 999 TEST(ProcSelfMaps, Map1) { 1000 Mapping mapping = 1001 ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_READ, MAP_PRIVATE)); 1002 auto proc_self_maps = 1003 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); 1004 std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); 1005 std::vector<std::string> addrs; 1006 // Make sure if is listed. 1007 for (const auto& str : strings) { 1008 if (str == AnonymousMapsEntryForMapping(mapping, PROT_READ)) { 1009 addrs.push_back(str); 1010 } 1011 } 1012 ASSERT_EQ(1, addrs.size()); 1013 } 1014 1015 TEST(ProcSelfMaps, Map2) { 1016 // NOTE(magi): The permissions must be different or the pages will get merged. 1017 Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE( 1018 MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE)); 1019 Mapping map2 = 1020 ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE)); 1021 1022 auto proc_self_maps = 1023 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); 1024 std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); 1025 std::vector<std::string> addrs; 1026 // Make sure if is listed. 1027 for (const auto& str : strings) { 1028 if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { 1029 addrs.push_back(str); 1030 } 1031 } 1032 ASSERT_EQ(1, addrs.size()); 1033 addrs.clear(); 1034 for (const auto& str : strings) { 1035 if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { 1036 addrs.push_back(str); 1037 } 1038 } 1039 ASSERT_EQ(1, addrs.size()); 1040 } 1041 1042 TEST(ProcSelfMaps, MapUnmap) { 1043 Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE( 1044 MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE)); 1045 Mapping map2 = 1046 ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE)); 1047 1048 auto proc_self_maps = 1049 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); 1050 std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); 1051 std::vector<std::string> addrs; 1052 // Make sure if is listed. 1053 for (const auto& str : strings) { 1054 if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { 1055 addrs.push_back(str); 1056 } 1057 } 1058 ASSERT_EQ(1, addrs.size()) << proc_self_maps; 1059 addrs.clear(); 1060 for (const auto& str : strings) { 1061 if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { 1062 addrs.push_back(str); 1063 } 1064 } 1065 ASSERT_EQ(1, addrs.size()); 1066 1067 map2.reset(); 1068 1069 // Read it again. 1070 proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); 1071 strings = absl::StrSplit(proc_self_maps, '\n'); 1072 // First entry should be there. 1073 addrs.clear(); 1074 for (const auto& str : strings) { 1075 if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { 1076 addrs.push_back(str); 1077 } 1078 } 1079 ASSERT_EQ(1, addrs.size()); 1080 addrs.clear(); 1081 // But not the second. 1082 for (const auto& str : strings) { 1083 if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { 1084 addrs.push_back(str); 1085 } 1086 } 1087 ASSERT_EQ(0, addrs.size()); 1088 } 1089 1090 TEST(ProcSelfMaps, Mprotect) { 1091 // FIXME(jamieliu): Linux's mprotect() sometimes fails to merge VMAs in this 1092 // case. 1093 SKIP_IF(!IsRunningOnGvisor()); 1094 1095 // Reserve 5 pages of address space. 1096 Mapping m = ASSERT_NO_ERRNO_AND_VALUE( 1097 MmapAnon(5 * kPageSize, PROT_NONE, MAP_PRIVATE)); 1098 1099 // Change the permissions on the middle 3 pages. (The first and last pages may 1100 // be merged with other vmas on either side, so they aren't tested directly; 1101 // they just ensure that the middle 3 pages are bracketed by VMAs with 1102 // incompatible permissions.) 1103 ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + kPageSize), 1104 3 * kPageSize, PROT_READ), 1105 SyscallSucceeds()); 1106 1107 // Check that the middle 3 pages make up a single VMA. 1108 auto proc_self_maps = 1109 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); 1110 std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); 1111 EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize, 1112 3 * kPageSize, PROT_READ))); 1113 1114 // Change the permissions on the middle page only. 1115 ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), 1116 kPageSize, PROT_READ | PROT_WRITE), 1117 SyscallSucceeds()); 1118 1119 // Check that the single VMA has been split into 3 VMAs. 1120 proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); 1121 strings = absl::StrSplit(proc_self_maps, '\n'); 1122 EXPECT_THAT( 1123 strings, 1124 IsSupersetOf( 1125 {AnonymousMapsEntry(m.addr() + kPageSize, kPageSize, PROT_READ), 1126 AnonymousMapsEntry(m.addr() + 2 * kPageSize, kPageSize, 1127 PROT_READ | PROT_WRITE), 1128 AnonymousMapsEntry(m.addr() + 3 * kPageSize, kPageSize, 1129 PROT_READ)})); 1130 1131 // Change the permissions on the middle page back. 1132 ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), 1133 kPageSize, PROT_READ), 1134 SyscallSucceeds()); 1135 1136 // Check that the 3 VMAs have been merged back into a single VMA. 1137 proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); 1138 strings = absl::StrSplit(proc_self_maps, '\n'); 1139 EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize, 1140 3 * kPageSize, PROT_READ))); 1141 } 1142 1143 TEST(ProcSelfMaps, SharedAnon) { 1144 const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( 1145 MmapAnon(kPageSize, PROT_READ, MAP_SHARED | MAP_ANONYMOUS)); 1146 1147 const auto proc_self_maps = 1148 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); 1149 for (const auto& line : absl::StrSplit(proc_self_maps, '\n')) { 1150 const auto entry = ASSERT_NO_ERRNO_AND_VALUE(ParseProcMapsLine(line)); 1151 if (entry.start <= m.addr() && m.addr() < entry.end) { 1152 // cf. proc(5), "/proc/[pid]/map_files/" 1153 EXPECT_EQ(entry.filename, "/dev/zero (deleted)"); 1154 return; 1155 } 1156 } 1157 FAIL() << "no maps entry containing mapping at " << m.ptr(); 1158 } 1159 1160 TEST(ProcSelfFd, OpenFd) { 1161 int pipe_fds[2]; 1162 ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds()); 1163 1164 // Reopen the write end. 1165 const std::string path = absl::StrCat("/proc/self/fd/", pipe_fds[1]); 1166 const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_WRONLY)); 1167 1168 // Ensure that a read/write works. 1169 const std::string data = "hello"; 1170 std::unique_ptr<char[]> buffer(new char[data.size()]); 1171 EXPECT_THAT(write(fd.get(), data.c_str(), data.size()), 1172 SyscallSucceedsWithValue(5)); 1173 EXPECT_THAT(read(pipe_fds[0], buffer.get(), data.size()), 1174 SyscallSucceedsWithValue(5)); 1175 EXPECT_EQ(strncmp(buffer.get(), data.c_str(), data.size()), 0); 1176 1177 // Cleanup. 1178 ASSERT_THAT(close(pipe_fds[0]), SyscallSucceeds()); 1179 ASSERT_THAT(close(pipe_fds[1]), SyscallSucceeds()); 1180 } 1181 1182 static void CheckFdDirGetdentsDuplicates(const std::string& path) { 1183 const FileDescriptor fd = 1184 ASSERT_NO_ERRNO_AND_VALUE(Open(path.c_str(), O_RDONLY | O_DIRECTORY)); 1185 // Open a FD whose value is supposed to be much larger than 1186 // the number of FDs opened by current process. 1187 auto newfd = fcntl(fd.get(), F_DUPFD, 1024); 1188 EXPECT_GE(newfd, 1024); 1189 auto fd_closer = Cleanup([newfd]() { close(newfd); }); 1190 auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir(path.c_str(), false)); 1191 absl::flat_hash_set<std::string> fd_files_dedup(fd_files.begin(), 1192 fd_files.end()); 1193 EXPECT_EQ(fd_files.size(), fd_files_dedup.size()); 1194 } 1195 1196 // This is a regression test for gvisor.dev/issues/3894 1197 TEST(ProcSelfFd, GetdentsDuplicates) { 1198 CheckFdDirGetdentsDuplicates("/proc/self/fd"); 1199 } 1200 1201 // This is a regression test for gvisor.dev/issues/3894 1202 TEST(ProcSelfFdInfo, GetdentsDuplicates) { 1203 CheckFdDirGetdentsDuplicates("/proc/self/fdinfo"); 1204 } 1205 1206 TEST(ProcSelfFdInfo, CorrectFds) { 1207 // Make sure there is at least one open file. 1208 auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); 1209 const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); 1210 1211 // Get files in /proc/self/fd. 1212 auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fd", false)); 1213 1214 // Get files in /proc/self/fdinfo. 1215 auto fdinfo_files = 1216 ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fdinfo", false)); 1217 1218 // They should contain the same fds. 1219 EXPECT_THAT(fd_files, UnorderedElementsAreArray(fdinfo_files)); 1220 1221 // Both should contain fd. 1222 auto fd_s = absl::StrCat(fd.get()); 1223 EXPECT_THAT(fd_files, Contains(fd_s)); 1224 } 1225 1226 TEST(ProcSelfFdInfo, Flags) { 1227 std::string path = NewTempAbsPath(); 1228 1229 // Create file here with O_CREAT to test that O_CREAT does not appear in 1230 // fdinfo flags. 1231 int flags = O_CREAT | O_RDWR | O_APPEND | O_CLOEXEC; 1232 const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, flags, 0644)); 1233 1234 // Automatically delete path. 1235 TempPath temp_path(path); 1236 1237 // O_CREAT does not appear in fdinfo flags. 1238 flags &= ~O_CREAT; 1239 1240 // O_LARGEFILE always appears (on x86_64). 1241 flags |= kOLargeFile; 1242 1243 auto fd_info = ASSERT_NO_ERRNO_AND_VALUE( 1244 GetContents(absl::StrCat("/proc/self/fdinfo/", fd.get()))); 1245 EXPECT_THAT(fd_info, HasSubstr(absl::StrFormat("flags:\t%#o", flags))); 1246 } 1247 1248 TEST(ProcSelfExe, Absolute) { 1249 auto exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); 1250 EXPECT_EQ(exe[0], '/'); 1251 } 1252 1253 TEST(ProcSelfCwd, Absolute) { 1254 auto exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/cwd")); 1255 EXPECT_EQ(exe[0], '/'); 1256 } 1257 1258 TEST(ProcSelfRoot, IsRoot) { 1259 auto exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/root")); 1260 EXPECT_EQ(exe, "/"); 1261 } 1262 1263 // Sanity check that /proc/cmdline is present. 1264 TEST(ProcCmdline, IsPresent) { 1265 std::string proc_cmdline = 1266 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cmdline")); 1267 ASSERT_FALSE(proc_cmdline.empty()); 1268 } 1269 1270 // Sanity check for /proc/cpuinfo fields that must be present. 1271 TEST(ProcCpuinfo, RequiredFieldsArePresent) { 1272 std::string proc_cpuinfo = 1273 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo")); 1274 ASSERT_FALSE(proc_cpuinfo.empty()); 1275 1276 // Check that the usual fields are there. We don't really care about the 1277 // contents. 1278 for (const char* field : required_fields) { 1279 EXPECT_THAT(proc_cpuinfo, HasSubstr(field)); 1280 } 1281 } 1282 1283 TEST(ProcCpuinfo, DeniesWriteNonRoot) { 1284 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); 1285 1286 // Do setuid in a separate thread so that after finishing this test, the 1287 // process can still open files the test harness created before starting this 1288 // test. Otherwise, the files are created by root (UID before the test), but 1289 // cannot be opened by the `uid` set below after the test. After calling 1290 // setuid(non-zero-UID), there is no way to get root privileges back. 1291 ScopedThread([&] { 1292 // Use syscall instead of glibc setuid wrapper because we want this setuid 1293 // call to only apply to this task. POSIX threads, however, require that all 1294 // threads have the same UIDs, so using the setuid wrapper sets all threads' 1295 // real UID. 1296 // Also drops capabilities. 1297 constexpr int kNobody = 65534; 1298 EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds()); 1299 EXPECT_THAT(open("/proc/cpuinfo", O_WRONLY), SyscallFailsWithErrno(EACCES)); 1300 EXPECT_THAT(truncate("/proc/cpuinfo", 123), SyscallFailsWithErrno(EACCES)); 1301 }); 1302 } 1303 1304 // With root privileges, it is possible to open /proc/cpuinfo with write mode, 1305 // but all write operations should fail. 1306 TEST(ProcCpuinfo, DeniesWriteRoot) { 1307 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); 1308 1309 int fd; 1310 EXPECT_THAT(fd = open("/proc/cpuinfo", O_WRONLY), SyscallSucceeds()); 1311 if (fd > 0) { 1312 // Truncate is not tested--it may succeed on some kernels without doing 1313 // anything. 1314 EXPECT_THAT(write(fd, "x", 1), SyscallFails()); 1315 EXPECT_THAT(pwrite(fd, "x", 1, 123), SyscallFails()); 1316 } 1317 } 1318 1319 // Cpuinfo should not change across save/restore. 1320 TEST(ProcCpuinfo, Stable) { 1321 std::string output_before; 1322 ASSERT_NO_ERRNO(GetContents("/proc/cpuinfo", &output_before)); 1323 MaybeSave(); 1324 std::string output_after; 1325 ASSERT_NO_ERRNO(GetContents("/proc/cpuinfo", &output_after)); 1326 1327 std::vector<std::string> before_fields = absl::StrSplit(output_before, '\n'); 1328 std::vector<std::string> after_fields = absl::StrSplit(output_before, '\n'); 1329 RemoveUnstableCPUInfoFields(before_fields); 1330 RemoveUnstableCPUInfoFields(after_fields); 1331 1332 EXPECT_THAT(absl::StrJoin(before_fields, "\n"), 1333 Eq(absl::StrJoin(after_fields, "\n"))); 1334 } 1335 1336 // Sanity checks that uptime is present. 1337 TEST(ProcUptime, IsPresent) { 1338 std::string proc_uptime = 1339 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); 1340 ASSERT_FALSE(proc_uptime.empty()); 1341 std::vector<std::string> uptime_parts = absl::StrSplit(proc_uptime, ' '); 1342 1343 // Parse once. 1344 double uptime0, uptime1, idletime0, idletime1; 1345 ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime0)); 1346 ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime0)); 1347 1348 // Sleep for one second. 1349 absl::SleepFor(absl::Seconds(1)); 1350 1351 // Parse again. 1352 proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); 1353 ASSERT_FALSE(proc_uptime.empty()); 1354 uptime_parts = absl::StrSplit(proc_uptime, ' '); 1355 ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime1)); 1356 ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime1)); 1357 1358 // Sanity check. 1359 // 1360 // We assert that between 0.99 and 59.99 seconds have passed. If more than a 1361 // minute has passed, then we must be executing really, really slowly. 1362 EXPECT_GE(uptime0, 0.0); 1363 EXPECT_GE(idletime0, 0.0); 1364 EXPECT_GT(uptime1, uptime0); 1365 EXPECT_GE(uptime1, uptime0 + 0.99); 1366 EXPECT_LE(uptime1, uptime0 + 59.99); 1367 EXPECT_GE(idletime1, idletime0); 1368 } 1369 1370 TEST(ProcMeminfo, ContainsBasicFields) { 1371 std::string proc_meminfo = 1372 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo")); 1373 EXPECT_THAT(proc_meminfo, AllOf(ContainsRegex(R"(MemTotal:\s+[0-9]+ kB)"), 1374 ContainsRegex(R"(MemFree:\s+[0-9]+ kB)"))); 1375 } 1376 1377 TEST(ProcSentryMeminfo, ContainsFieldsAndEndsWithNewline) { 1378 SKIP_IF(!IsRunningOnGvisor()); 1379 1380 std::string proc_sentry_meminfo = 1381 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sentry-meminfo")); 1382 1383 // Assert that all expected fields are present. 1384 EXPECT_THAT(proc_sentry_meminfo, 1385 AllOf(ContainsRegex(R"(Alloc:\s+[0-9]+ kB)"), 1386 ContainsRegex(R"(TotalAlloc:\s+[0-9]+ kB)"), 1387 ContainsRegex(R"(Sys:\s+[0-9]+ kB)"), 1388 ContainsRegex(R"(Mallocs:\s+[0-9]+)"), 1389 ContainsRegex(R"(Frees:\s+[0-9]+)"), 1390 ContainsRegex(R"(Live Objects:\s+[0-9]+)"), 1391 ContainsRegex(R"(HeapAlloc:\s+[0-9]+ kB)"), 1392 ContainsRegex(R"(HeapSys:\s+[0-9]+ kB)"), 1393 ContainsRegex(R"(HeapObjects:\s+[0-9]+)"))); 1394 1395 // Assert that /proc/sentry-meminfo ends with a new line. 1396 EXPECT_EQ(proc_sentry_meminfo.back(), '\n'); 1397 } 1398 1399 TEST(ProcStat, ContainsBasicFields) { 1400 std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); 1401 1402 std::vector<std::string> names; 1403 for (auto const& line : absl::StrSplit(proc_stat, '\n')) { 1404 std::vector<std::string> fields = 1405 absl::StrSplit(line, ' ', absl::SkipWhitespace()); 1406 if (fields.empty()) { 1407 continue; 1408 } 1409 names.push_back(fields[0]); 1410 } 1411 1412 EXPECT_THAT(names, 1413 IsSupersetOf({"cpu", "intr", "ctxt", "btime", "processes", 1414 "procs_running", "procs_blocked", "softirq"})); 1415 } 1416 1417 TEST(ProcStat, EndsWithNewline) { 1418 std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); 1419 EXPECT_EQ(proc_stat.back(), '\n'); 1420 } 1421 1422 TEST(ProcStat, Fields) { 1423 std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); 1424 1425 std::vector<std::string> names; 1426 for (auto const& line : absl::StrSplit(proc_stat, '\n')) { 1427 std::vector<std::string> fields = 1428 absl::StrSplit(line, ' ', absl::SkipWhitespace()); 1429 if (fields.empty()) { 1430 continue; 1431 } 1432 1433 if (absl::StartsWith(fields[0], "cpu")) { 1434 // As of Linux 3.11, each CPU entry has 10 fields, plus the name. 1435 EXPECT_GE(fields.size(), 11) << proc_stat; 1436 } else if (fields[0] == "ctxt") { 1437 // Single field. 1438 EXPECT_EQ(fields.size(), 2) << proc_stat; 1439 } else if (fields[0] == "btime") { 1440 // Single field. 1441 EXPECT_EQ(fields.size(), 2) << proc_stat; 1442 } else if (fields[0] == "itime") { 1443 // Single field. 1444 ASSERT_EQ(fields.size(), 2) << proc_stat; 1445 // This is the only floating point field. 1446 double val; 1447 EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_stat; 1448 continue; 1449 } else if (fields[0] == "processes") { 1450 // Single field. 1451 EXPECT_EQ(fields.size(), 2) << proc_stat; 1452 } else if (fields[0] == "procs_running") { 1453 // Single field. 1454 EXPECT_EQ(fields.size(), 2) << proc_stat; 1455 } else if (fields[0] == "procs_blocked") { 1456 // Single field. 1457 EXPECT_EQ(fields.size(), 2) << proc_stat; 1458 } else if (fields[0] == "softirq") { 1459 // As of Linux 3.11, there are 10 softirqs. 12 fields for name + total. 1460 EXPECT_GE(fields.size(), 12) << proc_stat; 1461 } 1462 1463 // All fields besides itime are valid base 10 numbers. 1464 for (size_t i = 1; i < fields.size(); i++) { 1465 uint64_t val; 1466 EXPECT_TRUE(absl::SimpleAtoi(fields[i], &val)) << proc_stat; 1467 } 1468 } 1469 } 1470 1471 TEST(ProcLoadavg, EndsWithNewline) { 1472 std::string proc_loadvg = 1473 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); 1474 EXPECT_EQ(proc_loadvg.back(), '\n'); 1475 } 1476 1477 TEST(ProcLoadavg, Fields) { 1478 std::string proc_loadvg = 1479 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); 1480 std::vector<std::string> lines = absl::StrSplit(proc_loadvg, '\n'); 1481 1482 // Single line. 1483 EXPECT_EQ(lines.size(), 2) << proc_loadvg; 1484 1485 std::vector<std::string> fields = 1486 absl::StrSplit(lines[0], absl::ByAnyChar(" /"), absl::SkipWhitespace()); 1487 1488 // Six fields. 1489 EXPECT_EQ(fields.size(), 6) << proc_loadvg; 1490 1491 double val; 1492 uint64_t val2; 1493 // First three fields are floating point numbers. 1494 EXPECT_TRUE(absl::SimpleAtod(fields[0], &val)) << proc_loadvg; 1495 EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_loadvg; 1496 EXPECT_TRUE(absl::SimpleAtod(fields[2], &val)) << proc_loadvg; 1497 // Rest of the fields are valid base 10 numbers. 1498 EXPECT_TRUE(absl::SimpleAtoi(fields[3], &val2)) << proc_loadvg; 1499 EXPECT_TRUE(absl::SimpleAtoi(fields[4], &val2)) << proc_loadvg; 1500 EXPECT_TRUE(absl::SimpleAtoi(fields[5], &val2)) << proc_loadvg; 1501 } 1502 1503 // NOTE: Tests in priority.cc also check certain priority related fields in 1504 // /proc/self/stat. 1505 1506 class ProcPidStatTest : public ::testing::TestWithParam<std::string> {}; 1507 1508 // Parses /proc/<pid>/stat output to a vector of string. We need a more 1509 // complicated approach than absl::StrSplit because COMM can contain spaces. 1510 PosixErrorOr<std::vector<std::string>> ParseProcPidStat( 1511 absl::string_view proc_pid_stat) { 1512 auto comm_start = proc_pid_stat.find('('); 1513 auto comm_end = proc_pid_stat.rfind(')'); 1514 if (comm_start == proc_pid_stat.npos || comm_end == proc_pid_stat.npos) { 1515 return PosixError(EINVAL, absl::StrCat("Invalid /proc/<pid>/stat")); 1516 } 1517 std::vector<std::string> fields = 1518 absl::StrSplit(proc_pid_stat.substr(0, comm_start - 1), ' '); 1519 fields.push_back(std::string{proc_pid_stat.substr(comm_start, comm_end + 1)}); 1520 absl::c_transform(absl::StrSplit(proc_pid_stat.substr(comm_end + 2), ' '), 1521 std::back_inserter(fields), 1522 [](auto sv) { return std::string{sv}; }); 1523 return fields; 1524 } 1525 1526 TEST_P(ProcPidStatTest, HasBasicFields) { 1527 std::string proc_pid_stat = ASSERT_NO_ERRNO_AND_VALUE( 1528 GetContents(absl::StrCat("/proc/", GetParam(), "/stat"))); 1529 1530 ASSERT_FALSE(proc_pid_stat.empty()); 1531 std::vector<std::string> fields = 1532 ASSERT_NO_ERRNO_AND_VALUE(ParseProcPidStat(proc_pid_stat)); 1533 1534 ASSERT_GE(fields.size(), 24); 1535 EXPECT_EQ(absl::StrCat(getpid()), fields[0]); 1536 // fields[1] is the thread name. 1537 EXPECT_EQ("R", fields[2]); // task state 1538 EXPECT_EQ(absl::StrCat(getppid()), fields[3]); 1539 1540 // If the test starts up quickly, then the process start time and the kernel 1541 // boot time will be very close, and the proc starttime field (which is the 1542 // delta of the two times) will be 0. For that unfortunate reason, we can 1543 // only check that starttime >= 0, and not that it is strictly > 0. 1544 uint64_t starttime; 1545 ASSERT_TRUE(absl::SimpleAtoi(fields[21], &starttime)); 1546 EXPECT_GE(starttime, 0); 1547 1548 uint64_t vss; 1549 ASSERT_TRUE(absl::SimpleAtoi(fields[22], &vss)); 1550 EXPECT_GT(vss, 0); 1551 1552 uint64_t rss; 1553 ASSERT_TRUE(absl::SimpleAtoi(fields[23], &rss)); 1554 EXPECT_GT(rss, 0); 1555 1556 uint64_t rsslim; 1557 ASSERT_TRUE(absl::SimpleAtoi(fields[24], &rsslim)); 1558 EXPECT_GT(rsslim, 0); 1559 } 1560 1561 INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatTest, 1562 ::testing::Values("self", absl::StrCat(getpid()))); 1563 1564 using ProcPidStatmTest = ::testing::TestWithParam<std::string>; 1565 1566 TEST_P(ProcPidStatmTest, HasBasicFields) { 1567 std::string proc_pid_statm = ASSERT_NO_ERRNO_AND_VALUE( 1568 GetContents(absl::StrCat("/proc/", GetParam(), "/statm"))); 1569 ASSERT_FALSE(proc_pid_statm.empty()); 1570 std::vector<std::string> fields = absl::StrSplit(proc_pid_statm, ' '); 1571 ASSERT_GE(fields.size(), 7); 1572 1573 uint64_t vss; 1574 ASSERT_TRUE(absl::SimpleAtoi(fields[0], &vss)); 1575 EXPECT_GT(vss, 0); 1576 1577 uint64_t rss; 1578 ASSERT_TRUE(absl::SimpleAtoi(fields[1], &rss)); 1579 EXPECT_GT(rss, 0); 1580 } 1581 1582 INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatmTest, 1583 ::testing::Values("self", absl::StrCat(getpid()))); 1584 1585 PosixErrorOr<uint64_t> CurrentRSS() { 1586 ASSIGN_OR_RETURN_ERRNO(auto proc_self_stat, GetContents("/proc/self/stat")); 1587 if (proc_self_stat.empty()) { 1588 return PosixError(EINVAL, "empty /proc/self/stat"); 1589 } 1590 1591 ASSIGN_OR_RETURN_ERRNO(std::vector<std::string> fields, 1592 ParseProcPidStat(proc_self_stat)); 1593 if (fields.size() < 24) { 1594 return PosixError( 1595 EINVAL, 1596 absl::StrCat("/proc/self/stat has too few fields: ", proc_self_stat)); 1597 } 1598 1599 uint64_t rss; 1600 if (!absl::SimpleAtoi(fields[23], &rss)) { 1601 return PosixError( 1602 EINVAL, absl::StrCat("/proc/self/stat RSS field is not a number: ", 1603 fields[23])); 1604 } 1605 1606 // RSS is given in number of pages. 1607 return rss * kPageSize; 1608 } 1609 1610 // The size of mapping created by MapPopulateRSS. 1611 constexpr uint64_t kMappingSize = 100 << 20; 1612 1613 // Tolerance on RSS comparisons to account for background thread mappings, 1614 // reclaimed pages, newly faulted pages, etc. 1615 constexpr uint64_t kRSSTolerance = 10 << 20; 1616 1617 // Capture RSS before and after an anonymous mapping with passed prot. 1618 void MapPopulateRSS(int prot, uint64_t* before, uint64_t* after) { 1619 *before = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); 1620 1621 // N.B. The kernel asynchronously accumulates per-task RSS counters into the 1622 // mm RSS, which is exposed by /proc/PID/stat. Task exit is a synchronization 1623 // point (kernel/exit.c:do_exit -> sync_mm_rss), so perform the mapping on 1624 // another thread to ensure it is reflected in RSS after the thread exits. 1625 Mapping mapping; 1626 ScopedThread t([&mapping, prot] { 1627 mapping = ASSERT_NO_ERRNO_AND_VALUE( 1628 MmapAnon(kMappingSize, prot, MAP_PRIVATE | MAP_POPULATE)); 1629 }); 1630 t.Join(); 1631 1632 *after = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); 1633 } 1634 1635 // TODO(b/73896574): Test for PROT_READ + MAP_POPULATE anonymous mappings. Their 1636 // semantics are more subtle: 1637 // 1638 // Small pages -> Zero page mapped, not counted in RSS 1639 // (mm/memory.c:do_anonymous_page). 1640 // 1641 // Huge pages (THP enabled, use_zero_page=0) -> Pages committed 1642 // (mm/memory.c:__handle_mm_fault -> create_huge_pmd). 1643 // 1644 // Huge pages (THP enabled, use_zero_page=1) -> Zero page mapped, not counted in 1645 // RSS (mm/huge_memory.c:do_huge_pmd_anonymous_page). 1646 1647 // PROT_WRITE + MAP_POPULATE anonymous mappings are always committed. 1648 TEST(ProcSelfStat, PopulateWriteRSS) { 1649 uint64_t before, after; 1650 MapPopulateRSS(PROT_READ | PROT_WRITE, &before, &after); 1651 1652 // Mapping is committed. 1653 EXPECT_NEAR(before + kMappingSize, after, kRSSTolerance); 1654 } 1655 1656 // PROT_NONE + MAP_POPULATE anonymous mappings are never committed. 1657 TEST(ProcSelfStat, PopulateNoneRSS) { 1658 uint64_t before, after; 1659 MapPopulateRSS(PROT_NONE, &before, &after); 1660 1661 // Mapping not committed. 1662 EXPECT_NEAR(before, after, kRSSTolerance); 1663 } 1664 1665 // Returns the calling thread's name. 1666 PosixErrorOr<std::string> ThreadName() { 1667 // "The buffer should allow space for up to 16 bytes; the returned std::string 1668 // will be null-terminated if it is shorter than that." - prctl(2). But we 1669 // always want the thread name to be null-terminated. 1670 char thread_name[17]; 1671 int rc = prctl(PR_GET_NAME, thread_name, 0, 0, 0); 1672 MaybeSave(); 1673 if (rc < 0) { 1674 return PosixError(errno, "prctl(PR_GET_NAME)"); 1675 } 1676 thread_name[16] = '\0'; 1677 return std::string(thread_name); 1678 } 1679 1680 // Parses the contents of a /proc/[pid]/status file into a collection of 1681 // key-value pairs. 1682 PosixErrorOr<absl::btree_map<std::string, std::string>> ParseProcStatus( 1683 absl::string_view status_str) { 1684 absl::btree_map<std::string, std::string> fields; 1685 for (absl::string_view const line : 1686 absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) { 1687 const std::pair<absl::string_view, absl::string_view> kv = 1688 absl::StrSplit(line, absl::MaxSplits(":\t", 1)); 1689 if (kv.first.empty()) { 1690 return PosixError( 1691 EINVAL, absl::StrCat("failed to parse key in line \"", line, "\"")); 1692 } 1693 std::string key(kv.first); 1694 if (fields.count(key)) { 1695 return PosixError(EINVAL, 1696 absl::StrCat("duplicate key \"", kv.first, "\"")); 1697 } 1698 std::string value(kv.second); 1699 absl::StripLeadingAsciiWhitespace(&value); 1700 fields.emplace(std::move(key), std::move(value)); 1701 } 1702 return fields; 1703 } 1704 1705 TEST(ParseProcStatusTest, ParsesSimpleStatusFileWithMixedWhitespaceCorrectly) { 1706 EXPECT_THAT( 1707 ParseProcStatus( 1708 "Name:\tinit\nState:\tS (sleeping)\nCapEff:\t 0000001fffffffff\n"), 1709 IsPosixErrorOkAndHolds(UnorderedElementsAre( 1710 Pair("Name", "init"), Pair("State", "S (sleeping)"), 1711 Pair("CapEff", "0000001fffffffff")))); 1712 } 1713 1714 TEST(ParseProcStatusTest, DetectsDuplicateKeys) { 1715 auto proc_status_or = ParseProcStatus("Name:\tfoo\nName:\tfoo\n"); 1716 EXPECT_THAT(proc_status_or, 1717 PosixErrorIs(EINVAL, ::testing::StrEq("duplicate key \"Name\""))); 1718 } 1719 1720 TEST(ParseProcStatusTest, DetectsMissingTabs) { 1721 EXPECT_THAT(ParseProcStatus("Name:foo\nPid: 1\n"), 1722 IsPosixErrorOkAndHolds(UnorderedElementsAre(Pair("Name:foo", ""), 1723 Pair("Pid: 1", "")))); 1724 } 1725 1726 TEST(ProcPidStatusTest, HasBasicFields) { 1727 // Do this on a separate thread since we want tgid != tid. 1728 ScopedThread([] { 1729 const pid_t tgid = getpid(); 1730 const pid_t tid = syscall(SYS_gettid); 1731 EXPECT_NE(tgid, tid); 1732 const auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(ThreadName()); 1733 1734 std::string status_str = ASSERT_NO_ERRNO_AND_VALUE( 1735 GetContents(absl::StrCat("/proc/", tid, "/status"))); 1736 1737 ASSERT_FALSE(status_str.empty()); 1738 const auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); 1739 EXPECT_THAT(status, IsSupersetOf({ 1740 Pair("Name", thread_name), 1741 Pair("Tgid", absl::StrCat(tgid)), 1742 Pair("Pid", absl::StrCat(tid)), 1743 Pair("PPid", absl::StrCat(getppid())), 1744 })); 1745 1746 uid_t ruid, euid, suid; 1747 ASSERT_THAT(getresuid(&ruid, &euid, &suid), SyscallSucceeds()); 1748 gid_t rgid, egid, sgid; 1749 ASSERT_THAT(getresgid(&rgid, &egid, &sgid), SyscallSucceeds()); 1750 std::vector<gid_t> supplementary_gids; 1751 int ngids = getgroups(0, nullptr); 1752 supplementary_gids.resize(ngids); 1753 ASSERT_THAT(getgroups(ngids, supplementary_gids.data()), SyscallSucceeds()); 1754 1755 EXPECT_THAT( 1756 status, 1757 IsSupersetOf(std::vector< 1758 ::testing::Matcher<std::pair<std::string, std::string>>>{ 1759 // gVisor doesn't support fsuid/gid, and even if it did there is 1760 // no getfsuid/getfsgid(). 1761 Pair("Uid", 1762 StartsWith(absl::StrFormat("%d\t%d\t%d\t", ruid, euid, suid))), 1763 Pair("Gid", 1764 StartsWith(absl::StrFormat("%d\t%d\t%d\t", rgid, egid, sgid))), 1765 // ParseProcStatus strips leading whitespace for each value, 1766 // so if the Groups line is empty then the trailing space is 1767 // stripped. 1768 Pair("Groups", StartsWith(absl::StrJoin(supplementary_gids, " "))), 1769 })); 1770 }); 1771 } 1772 1773 TEST(ProcPidStatusTest, StateRunning) { 1774 // Task must be running when reading the file. 1775 const pid_t tid = syscall(SYS_gettid); 1776 std::string status_str = ASSERT_NO_ERRNO_AND_VALUE( 1777 GetContents(absl::StrCat("/proc/", tid, "/status"))); 1778 1779 EXPECT_THAT(ParseProcStatus(status_str), 1780 IsPosixErrorOkAndHolds(Contains(Pair("State", "R (running)")))); 1781 } 1782 1783 TEST(ProcPidStatusTest, StateSleeping) { 1784 // Starts a child process that blocks and checks that State is sleeping. 1785 auto res = WithSubprocess( 1786 [&](int pid) -> PosixError { 1787 // Because this test is timing based we will disable cooperative saving 1788 // and the test itself also has random saving disabled. 1789 const DisableSave ds; 1790 // Try multiple times in case the child isn't sleeping when status file 1791 // is read. 1792 MonotonicTimer timer; 1793 timer.Start(); 1794 for (;;) { 1795 ASSIGN_OR_RETURN_ERRNO( 1796 std::string status_str, 1797 GetContents(absl::StrCat("/proc/", pid, "/status"))); 1798 ASSIGN_OR_RETURN_ERRNO(auto map, ParseProcStatus(status_str)); 1799 if (map["State"] == std::string("S (sleeping)")) { 1800 // Test passed! 1801 return NoError(); 1802 } 1803 if (timer.Duration() > absl::Seconds(10)) { 1804 return PosixError(ETIMEDOUT, "Timeout waiting for child to sleep"); 1805 } 1806 absl::SleepFor(absl::Milliseconds(10)); 1807 } 1808 }, 1809 nullptr, nullptr); 1810 ASSERT_NO_ERRNO(res); 1811 } 1812 1813 TEST(ProcPidStatusTest, ValuesAreTabDelimited) { 1814 std::string status_str = 1815 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status")); 1816 ASSERT_FALSE(status_str.empty()); 1817 for (absl::string_view const line : 1818 absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) { 1819 EXPECT_NE(std::string::npos, line.find(":\t")); 1820 } 1821 } 1822 1823 // Threads properly counts running threads. 1824 // 1825 // TODO(mpratt): Test zombied threads while the thread group leader is still 1826 // running with generalized fork and clone children from the wait test. 1827 TEST(ProcPidStatusTest, Threads) { 1828 char buf[4096] = {}; 1829 EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf) - 1), 1830 SyscallSucceedsWithValue(Gt(0))); 1831 1832 auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf)); 1833 auto it = status.find("Threads"); 1834 ASSERT_NE(it, status.end()); 1835 int threads = -1; 1836 EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads)) 1837 << "Threads value " << it->second << " is not a number"; 1838 // Don't make assumptions about the exact number of threads, as it may not be 1839 // constant. 1840 EXPECT_GE(threads, 1); 1841 1842 memset(buf, 0, sizeof(buf)); 1843 EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf) - 1), 1844 SyscallSucceedsWithValue(Gt(0))); 1845 1846 status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf)); 1847 it = status.find("Threads"); 1848 ASSERT_NE(it, status.end()); 1849 threads = -1; 1850 EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads)) 1851 << "Threads value " << it->second << " is not a number"; 1852 // There must be only the thread group leader remaining, zombied. 1853 EXPECT_EQ(threads, 1); 1854 } 1855 1856 // Returns true if all characters in s are digits. 1857 bool IsDigits(absl::string_view s) { 1858 return std::all_of(s.begin(), s.end(), absl::ascii_isdigit); 1859 } 1860 1861 TEST(ProcPidStatTest, VmStats) { 1862 std::string status_str = 1863 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status")); 1864 ASSERT_FALSE(status_str.empty()); 1865 auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); 1866 1867 const auto vss_it = status.find("VmSize"); 1868 ASSERT_NE(vss_it, status.end()); 1869 1870 absl::string_view vss_str(vss_it->second); 1871 1872 // Room for the " kB" suffix plus at least one digit. 1873 ASSERT_GT(vss_str.length(), 3); 1874 EXPECT_TRUE(absl::EndsWith(vss_str, " kB")); 1875 // Everything else is part of a number. 1876 EXPECT_TRUE(IsDigits(vss_str.substr(0, vss_str.length() - 3))) << vss_str; 1877 // ... which is not 0. 1878 EXPECT_NE('0', vss_str[0]); 1879 1880 const auto rss_it = status.find("VmRSS"); 1881 ASSERT_NE(rss_it, status.end()); 1882 1883 absl::string_view rss_str(rss_it->second); 1884 1885 // Room for the " kB" suffix plus at least one digit. 1886 ASSERT_GT(rss_str.length(), 3); 1887 EXPECT_TRUE(absl::EndsWith(rss_str, " kB")); 1888 // Everything else is part of a number. 1889 EXPECT_TRUE(IsDigits(rss_str.substr(0, rss_str.length() - 3))) << rss_str; 1890 // ... which is not 0. 1891 EXPECT_NE('0', rss_str[0]); 1892 1893 const auto data_it = status.find("VmData"); 1894 ASSERT_NE(data_it, status.end()); 1895 1896 absl::string_view data_str(data_it->second); 1897 1898 // Room for the " kB" suffix plus at least one digit. 1899 ASSERT_GT(data_str.length(), 3); 1900 EXPECT_TRUE(absl::EndsWith(data_str, " kB")); 1901 // Everything else is part of a number. 1902 EXPECT_TRUE(IsDigits(data_str.substr(0, data_str.length() - 3))) << data_str; 1903 // ... which is not 0. 1904 EXPECT_NE('0', data_str[0]); 1905 } 1906 1907 // Parse an array of NUL-terminated char* arrays, returning a vector of 1908 // strings. 1909 std::vector<std::string> ParseNulTerminatedStrings(std::string contents) { 1910 EXPECT_EQ('\0', contents.back()); 1911 // The split will leave an empty string if the NUL-byte remains, so pop 1912 // it. 1913 contents.pop_back(); 1914 1915 return absl::StrSplit(contents, '\0'); 1916 } 1917 1918 TEST(ProcPidCmdline, MatchesArgv) { 1919 std::vector<std::string> proc_cmdline = ParseNulTerminatedStrings( 1920 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline"))); 1921 EXPECT_THAT(saved_argv, ContainerEq(proc_cmdline)); 1922 } 1923 1924 TEST(ProcPidEnviron, MatchesEnviron) { 1925 std::vector<std::string> proc_environ = ParseNulTerminatedStrings( 1926 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/environ"))); 1927 // Get the environment from the environ variable, which we will compare with 1928 // /proc/self/environ. 1929 std::vector<std::string> env; 1930 for (char** v = environ; *v; v++) { 1931 env.push_back(*v); 1932 } 1933 EXPECT_THAT(env, ContainerEq(proc_environ)); 1934 } 1935 1936 TEST(ProcPidCmdline, SubprocessForkSameCmdline) { 1937 std::vector<std::string> proc_cmdline_parent; 1938 std::vector<std::string> proc_cmdline; 1939 proc_cmdline_parent = ParseNulTerminatedStrings( 1940 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline"))); 1941 auto res = WithSubprocess( 1942 [&](int pid) -> PosixError { 1943 ASSIGN_OR_RETURN_ERRNO( 1944 auto raw_cmdline, 1945 GetContents(absl::StrCat("/proc/", pid, "/cmdline"))); 1946 proc_cmdline = ParseNulTerminatedStrings(raw_cmdline); 1947 return NoError(); 1948 }, 1949 nullptr, nullptr); 1950 ASSERT_NO_ERRNO(res); 1951 1952 for (size_t i = 0; i < proc_cmdline_parent.size(); i++) { 1953 EXPECT_EQ(proc_cmdline_parent[i], proc_cmdline[i]); 1954 } 1955 } 1956 1957 TEST(ProcPidCmdline, SubprocessSeekCmdline) { 1958 FileDescriptor fd; 1959 ASSERT_NO_ERRNO(WithSubprocess( 1960 [&](int pid) -> PosixError { 1961 // Running. Open /proc/pid/cmdline. 1962 ASSIGN_OR_RETURN_ERRNO( 1963 fd, Open(absl::StrCat("/proc/", pid, "/cmdline"), O_RDONLY)); 1964 return NoError(); 1965 }, 1966 [&](int pid) -> PosixError { 1967 // Zombie, but seek should still succeed. 1968 int ret = lseek(fd.get(), 0x801, 0); 1969 if (ret < 0) { 1970 return PosixError(errno); 1971 } 1972 return NoError(); 1973 }, 1974 [&](int pid) -> PosixError { 1975 // Exited. 1976 int ret = lseek(fd.get(), 0x801, 0); 1977 if (ret < 0) { 1978 return PosixError(errno); 1979 } 1980 return NoError(); 1981 })); 1982 } 1983 1984 // Test whether /proc/PID/ symlinks can be read for a running process. 1985 TEST(ProcPidSymlink, SubprocessRunning) { 1986 char buf[1]; 1987 1988 EXPECT_THAT(ReadlinkWhileRunning("exe", buf, sizeof(buf)), 1989 SyscallSucceedsWithValue(sizeof(buf))); 1990 1991 EXPECT_THAT(ReadlinkWhileRunning("ns/net", buf, sizeof(buf)), 1992 SyscallSucceedsWithValue(sizeof(buf))); 1993 1994 EXPECT_THAT(ReadlinkWhileRunning("ns/pid", buf, sizeof(buf)), 1995 SyscallSucceedsWithValue(sizeof(buf))); 1996 1997 EXPECT_THAT(ReadlinkWhileRunning("ns/user", buf, sizeof(buf)), 1998 SyscallSucceedsWithValue(sizeof(buf))); 1999 } 2000 2001 TEST(ProcPidSymlink, SubprocessZombied) { 2002 AutoCapability cap1(CAP_DAC_OVERRIDE, false); 2003 AutoCapability cap2(CAP_DAC_READ_SEARCH, false); 2004 2005 char buf[1]; 2006 2007 int want = EACCES; 2008 if (!IsRunningOnGvisor()) { 2009 auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); 2010 if (version.major > 4 || (version.major == 4 && version.minor > 3)) { 2011 want = ENOENT; 2012 } 2013 } 2014 2015 EXPECT_THAT(ReadlinkWhileZombied("exe", buf, sizeof(buf)), 2016 SyscallFailsWithErrno(want)); 2017 2018 if (!IsRunningOnGvisor()) { 2019 EXPECT_THAT(ReadlinkWhileZombied("ns/net", buf, sizeof(buf)), 2020 SyscallFailsWithErrno(want)); 2021 } 2022 2023 // FIXME(gvisor.dev/issue/164): Inconsistent behavior between linux on proc 2024 // files. 2025 // 2026 // ~4.3: Syscall fails with EACCES. 2027 // 4.17: Syscall succeeds and returns 1. 2028 // 2029 if (!IsRunningOnGvisor()) { 2030 return; 2031 } 2032 2033 EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), 2034 SyscallFailsWithErrno(want)); 2035 2036 EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), 2037 SyscallFailsWithErrno(want)); 2038 } 2039 2040 // Test whether /proc/PID/ symlinks can be read for an exited process. 2041 TEST(ProcPidSymlink, SubprocessExited) { 2042 char buf[1]; 2043 2044 EXPECT_THAT(ReadlinkWhileExited("exe", buf, sizeof(buf)), 2045 SyscallFailsWithErrno(ESRCH)); 2046 2047 EXPECT_THAT(ReadlinkWhileExited("ns/net", buf, sizeof(buf)), 2048 SyscallFailsWithErrno(ESRCH)); 2049 2050 EXPECT_THAT(ReadlinkWhileExited("ns/pid", buf, sizeof(buf)), 2051 SyscallFailsWithErrno(ESRCH)); 2052 2053 EXPECT_THAT(ReadlinkWhileExited("ns/user", buf, sizeof(buf)), 2054 SyscallFailsWithErrno(ESRCH)); 2055 } 2056 2057 // /proc/PID/exe points to the correct binary. 2058 TEST(ProcPidExe, Subprocess) { 2059 auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); 2060 auto expected_absolute_path = 2061 ASSERT_NO_ERRNO_AND_VALUE(MakeAbsolute(link, "")); 2062 2063 char actual[PATH_MAX + 1] = {}; 2064 ASSERT_THAT(ReadlinkWhileRunning("exe", actual, sizeof(actual)), 2065 SyscallSucceedsWithValue(Gt(0))); 2066 EXPECT_EQ(actual, expected_absolute_path); 2067 } 2068 2069 // /proc/PID/cwd points to the correct directory. 2070 TEST(ProcPidCwd, Subprocess) { 2071 auto want = ASSERT_NO_ERRNO_AND_VALUE(GetCWD()); 2072 2073 char got[PATH_MAX + 1] = {}; 2074 ASSERT_THAT(ReadlinkWhileRunning("cwd", got, sizeof(got)), 2075 SyscallSucceedsWithValue(Gt(0))); 2076 EXPECT_EQ(got, want); 2077 } 2078 2079 // /proc/PID/root points to the correct directory. 2080 TEST(ProcPidRoot, Subprocess) { 2081 char got[PATH_MAX + 1] = {}; 2082 ASSERT_THAT(ReadlinkWhileRunning("root", got, sizeof(got)), 2083 SyscallSucceedsWithValue(Gt(0))); 2084 EXPECT_STREQ(got, "/"); 2085 } 2086 2087 // Test whether /proc/PID/ files can be read for a running process. 2088 TEST(ProcPidFile, SubprocessRunning) { 2089 char buf[1]; 2090 2091 EXPECT_THAT(ReadWhileRunning("auxv", buf, sizeof(buf)), 2092 SyscallSucceedsWithValue(sizeof(buf))); 2093 2094 EXPECT_THAT(ReadWhileRunning("cmdline", buf, sizeof(buf)), 2095 SyscallSucceedsWithValue(sizeof(buf))); 2096 2097 EXPECT_THAT(ReadWhileRunning("comm", buf, sizeof(buf)), 2098 SyscallSucceedsWithValue(sizeof(buf))); 2099 2100 EXPECT_THAT(ReadWhileRunning("gid_map", buf, sizeof(buf)), 2101 SyscallSucceedsWithValue(sizeof(buf))); 2102 2103 EXPECT_THAT(ReadWhileRunning("io", buf, sizeof(buf)), 2104 SyscallSucceedsWithValue(sizeof(buf))); 2105 2106 EXPECT_THAT(ReadWhileRunning("maps", buf, sizeof(buf)), 2107 SyscallSucceedsWithValue(sizeof(buf))); 2108 2109 EXPECT_THAT(ReadWhileRunning("stat", buf, sizeof(buf)), 2110 SyscallSucceedsWithValue(sizeof(buf))); 2111 2112 EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf)), 2113 SyscallSucceedsWithValue(sizeof(buf))); 2114 2115 EXPECT_THAT(ReadWhileRunning("uid_map", buf, sizeof(buf)), 2116 SyscallSucceedsWithValue(sizeof(buf))); 2117 2118 EXPECT_THAT(ReadWhileRunning("oom_score", buf, sizeof(buf)), 2119 SyscallSucceedsWithValue(sizeof(buf))); 2120 2121 EXPECT_THAT(ReadWhileRunning("oom_score_adj", buf, sizeof(buf)), 2122 SyscallSucceedsWithValue(sizeof(buf))); 2123 } 2124 2125 // Test whether /proc/PID/ files can be read for a zombie process. 2126 TEST(ProcPidFile, SubprocessZombie) { 2127 char buf[1]; 2128 2129 // FIXME(gvisor.dev/issue/164): Loosen requirement due to inconsistent 2130 // behavior on different kernels. 2131 // 2132 // ~4.3: Succeds and returns 0. 2133 // 4.17: Succeeds and returns 1. 2134 // gVisor: Succeeds and returns 0. 2135 EXPECT_THAT(ReadWhileZombied("auxv", buf, sizeof(buf)), SyscallSucceeds()); 2136 2137 EXPECT_THAT(ReadWhileZombied("cmdline", buf, sizeof(buf)), 2138 SyscallSucceedsWithValue(0)); 2139 2140 EXPECT_THAT(ReadWhileZombied("comm", buf, sizeof(buf)), 2141 SyscallSucceedsWithValue(sizeof(buf))); 2142 2143 EXPECT_THAT(ReadWhileZombied("gid_map", buf, sizeof(buf)), 2144 SyscallSucceedsWithValue(sizeof(buf))); 2145 2146 EXPECT_THAT(ReadWhileZombied("maps", buf, sizeof(buf)), 2147 SyscallSucceedsWithValue(0)); 2148 2149 EXPECT_THAT(ReadWhileZombied("stat", buf, sizeof(buf)), 2150 SyscallSucceedsWithValue(sizeof(buf))); 2151 2152 EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf)), 2153 SyscallSucceedsWithValue(sizeof(buf))); 2154 2155 EXPECT_THAT(ReadWhileZombied("uid_map", buf, sizeof(buf)), 2156 SyscallSucceedsWithValue(sizeof(buf))); 2157 2158 EXPECT_THAT(ReadWhileZombied("oom_score", buf, sizeof(buf)), 2159 SyscallSucceedsWithValue(sizeof(buf))); 2160 2161 EXPECT_THAT(ReadWhileZombied("oom_score_adj", buf, sizeof(buf)), 2162 SyscallSucceedsWithValue(sizeof(buf))); 2163 2164 // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux 2165 // on proc files. 2166 // 2167 // ~4.3: Fails and returns EACCES. 2168 // gVisor & 4.17: Succeeds and returns 1. 2169 // 2170 // EXPECT_THAT(ReadWhileZombied("io", buf, sizeof(buf)), 2171 // SyscallFailsWithErrno(EACCES)); 2172 } 2173 2174 // Test whether /proc/PID/ files can be read for an exited process. 2175 TEST(ProcPidFile, SubprocessExited) { 2176 char buf[1]; 2177 2178 // FIXME(gvisor.dev/issue/164): Inconsistent behavior between kernels. 2179 // 2180 // ~4.3: Fails and returns ESRCH. 2181 // gVisor: Fails with ESRCH. 2182 // 4.17: Succeeds and returns 1. 2183 // 2184 // EXPECT_THAT(ReadWhileExited("auxv", buf, sizeof(buf)), 2185 // SyscallFailsWithErrno(ESRCH)); 2186 2187 EXPECT_THAT(ReadWhileExited("cmdline", buf, sizeof(buf)), 2188 SyscallFailsWithErrno(ESRCH)); 2189 2190 if (!IsRunningOnGvisor()) { 2191 // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. 2192 EXPECT_THAT(ReadWhileExited("comm", buf, sizeof(buf)), 2193 SyscallFailsWithErrno(ESRCH)); 2194 } 2195 2196 EXPECT_THAT(ReadWhileExited("gid_map", buf, sizeof(buf)), 2197 SyscallSucceedsWithValue(sizeof(buf))); 2198 2199 if (!IsRunningOnGvisor()) { 2200 // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. 2201 EXPECT_THAT(ReadWhileExited("io", buf, sizeof(buf)), 2202 SyscallFailsWithErrno(ESRCH)); 2203 } 2204 2205 if (!IsRunningOnGvisor()) { 2206 // FIXME(gvisor.dev/issue/164): Returns EOF on gVisor. 2207 EXPECT_THAT(ReadWhileExited("maps", buf, sizeof(buf)), 2208 SyscallFailsWithErrno(ESRCH)); 2209 } 2210 2211 if (!IsRunningOnGvisor()) { 2212 // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. 2213 EXPECT_THAT(ReadWhileExited("stat", buf, sizeof(buf)), 2214 SyscallFailsWithErrno(ESRCH)); 2215 } 2216 2217 if (!IsRunningOnGvisor()) { 2218 // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. 2219 EXPECT_THAT(ReadWhileExited("status", buf, sizeof(buf)), 2220 SyscallFailsWithErrno(ESRCH)); 2221 } 2222 2223 EXPECT_THAT(ReadWhileExited("uid_map", buf, sizeof(buf)), 2224 SyscallSucceedsWithValue(sizeof(buf))); 2225 2226 if (!IsRunningOnGvisor()) { 2227 // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. 2228 EXPECT_THAT(ReadWhileExited("oom_score", buf, sizeof(buf)), 2229 SyscallFailsWithErrno(ESRCH)); 2230 } 2231 2232 EXPECT_THAT(ReadWhileExited("oom_score_adj", buf, sizeof(buf)), 2233 SyscallFailsWithErrno(ESRCH)); 2234 } 2235 2236 PosixError DirContains(absl::string_view path, 2237 const std::vector<std::string>& expect, 2238 const std::vector<std::string>& exclude) { 2239 ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false)); 2240 2241 for (auto& expected_entry : expect) { 2242 auto cursor = std::find(listing.begin(), listing.end(), expected_entry); 2243 if (cursor == listing.end()) { 2244 return PosixError( 2245 ENOENT, 2246 absl::StrCat("Failed to find one or more paths in '", path, "'")); 2247 } 2248 } 2249 for (auto& excluded_entry : exclude) { 2250 auto cursor = std::find(listing.begin(), listing.end(), excluded_entry); 2251 if (cursor != listing.end()) { 2252 return PosixError(ENOENT, absl::StrCat("File '", excluded_entry, 2253 "' found in path '", path, "'")); 2254 } 2255 } 2256 return NoError(); 2257 } 2258 2259 PosixError EventuallyDirContains(absl::string_view path, 2260 const std::vector<std::string>& expect, 2261 const std::vector<std::string>& exclude) { 2262 constexpr int kRetryCount = 100; 2263 const absl::Duration kRetryDelay = absl::Milliseconds(100); 2264 2265 for (int i = 0; i < kRetryCount; ++i) { 2266 auto res = DirContains(path, expect, exclude); 2267 if (res.ok()) { 2268 return res; 2269 } else if (i < kRetryCount - 1) { 2270 // Sleep if this isn't the final iteration. 2271 absl::SleepFor(kRetryDelay); 2272 } 2273 } 2274 return PosixError(ETIMEDOUT, 2275 "Timed out while waiting for directory to contain files "); 2276 } 2277 2278 std::vector<std::string> TaskFiles(const std::vector<pid_t>& pids) { 2279 return ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); }, 2280 pids); 2281 } 2282 2283 TEST(ProcTask, Basic) { 2284 EXPECT_NO_ERRNO( 2285 DirContains("/proc/self/task", {".", "..", absl::StrCat(getpid())}, {})); 2286 } 2287 2288 // Helper class for creating a new task in the current thread group. 2289 class BlockingChild { 2290 public: 2291 BlockingChild() : thread_([=] { Start(); }) {} 2292 ~BlockingChild() { Join(); } 2293 2294 pid_t Tid() const { 2295 absl::MutexLock ml(&mu_); 2296 mu_.Await(absl::Condition(&tid_ready_)); 2297 return tid_; 2298 } 2299 2300 void Join() { 2301 { 2302 absl::MutexLock ml(&mu_); 2303 stop_ = true; 2304 } 2305 thread_.Join(); 2306 } 2307 2308 private: 2309 void Start() { 2310 absl::MutexLock ml(&mu_); 2311 tid_ = syscall(__NR_gettid); 2312 tid_ready_ = true; 2313 mu_.Await(absl::Condition(&stop_)); 2314 } 2315 2316 mutable absl::Mutex mu_; 2317 bool stop_ ABSL_GUARDED_BY(mu_) = false; 2318 pid_t tid_; 2319 bool tid_ready_ ABSL_GUARDED_BY(mu_) = false; 2320 2321 // Must be last to ensure that the destructor for the thread is run before 2322 // any other member of the object is destroyed. 2323 ScopedThread thread_; 2324 }; 2325 2326 TEST(ProcTask, NewThreadAppears) { 2327 BlockingChild child1; 2328 EXPECT_NO_ERRNO( 2329 DirContains("/proc/self/task", TaskFiles({child1.Tid()}), {})); 2330 } 2331 2332 TEST(ProcTask, KilledThreadsDisappear) { 2333 BlockingChild child1; 2334 EXPECT_NO_ERRNO( 2335 DirContains("/proc/self/task", TaskFiles({child1.Tid()}), {})); 2336 2337 // Stat child1's task file. Regression test for b/32097707. 2338 struct stat statbuf; 2339 const std::string child1_task_file = 2340 absl::StrCat("/proc/self/task/", child1.Tid()); 2341 EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallSucceeds()); 2342 2343 BlockingChild child2; 2344 EXPECT_NO_ERRNO(DirContains("/proc/self/task", 2345 TaskFiles({child1.Tid(), child2.Tid()}), {})); 2346 2347 BlockingChild child3; 2348 BlockingChild child4; 2349 BlockingChild child5; 2350 EXPECT_NO_ERRNO( 2351 DirContains("/proc/self/task", 2352 TaskFiles({child1.Tid(), child2.Tid(), child3.Tid(), 2353 child4.Tid(), child5.Tid()}), 2354 {})); 2355 2356 child2.Join(); 2357 EXPECT_NO_ERRNO(EventuallyDirContains( 2358 "/proc/self/task", 2359 TaskFiles({child1.Tid(), child3.Tid(), child4.Tid(), child5.Tid()}), 2360 TaskFiles({child2.Tid()}))); 2361 2362 child1.Join(); 2363 child4.Join(); 2364 EXPECT_NO_ERRNO(EventuallyDirContains( 2365 "/proc/self/task", TaskFiles({child3.Tid(), child5.Tid()}), 2366 TaskFiles({child2.Tid(), child1.Tid(), child4.Tid()}))); 2367 2368 // Stat child1's task file again. This time it should fail. See b/32097707. 2369 EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), 2370 SyscallFailsWithErrno(ENOENT)); 2371 2372 child3.Join(); 2373 child5.Join(); 2374 EXPECT_NO_ERRNO( 2375 EventuallyDirContains("/proc/self/task", {}, 2376 TaskFiles({child2.Tid(), child1.Tid(), child4.Tid(), 2377 child3.Tid(), child5.Tid()}))); 2378 } 2379 2380 TEST(ProcTask, ChildTaskDir) { 2381 BlockingChild child1; 2382 EXPECT_NO_ERRNO( 2383 DirContains("/proc/self/task", TaskFiles({child1.Tid()}), {})); 2384 EXPECT_NO_ERRNO(DirContains(absl::StrCat("/proc/", child1.Tid(), "/task"), 2385 TaskFiles({child1.Tid()}), {})); 2386 } 2387 2388 PosixError VerifyPidDir(std::string path) { 2389 return DirContains(path, {"exe", "fd", "io", "maps", "ns", "stat", "status"}, 2390 {}); 2391 } 2392 2393 TEST(ProcTask, VerifyTaskDir) { 2394 EXPECT_NO_ERRNO(VerifyPidDir("/proc/self")); 2395 2396 EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", getpid()))); 2397 BlockingChild child1; 2398 EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", child1.Tid()))); 2399 2400 // Only the first level of task directories should contain the 'task' 2401 // directory. That is: 2402 // 2403 // /proc/1234/task <- should exist 2404 // /proc/1234/task/1234/task <- should not exist 2405 // /proc/1234/task/1235/task <- should not exist (where 1235 is in the same 2406 // thread group as 1234). 2407 EXPECT_NO_ERRNO( 2408 DirContains(absl::StrCat("/proc/self/task/", getpid()), {}, {"task"})); 2409 } 2410 2411 TEST(ProcTask, VerifyTaskChildren) { 2412 auto path = JoinPath("/proc", absl::StrCat(getpid()), "task", 2413 absl::StrCat(gettid()), "children"); 2414 EXPECT_THAT(access(path.c_str(), F_OK), SyscallSucceeds()); 2415 2416 int pid1 = -1, status1 = -1; 2417 auto cleanup1 = 2418 ForkAndExec("/bin/sleep", {"sleep", "100"}, {}, nullptr, &pid1, &status1); 2419 ASSERT_GT(pid1, 0); 2420 ASSERT_EQ(status1, 0); 2421 2422 auto proc_children_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents(path)); 2423 EXPECT_EQ(absl::StrCat(pid1, " "), proc_children_file); 2424 2425 int pid2 = -1, status2 = -1; 2426 auto cleanup2 = 2427 ForkAndExec("/bin/sleep", {"sleep", "100"}, {}, nullptr, &pid2, &status2); 2428 ASSERT_GT(pid2, 0); 2429 ASSERT_EQ(status2, 0); 2430 2431 proc_children_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents(path)); 2432 2433 // /children contains space-separated sorted list of thread Ids of children. 2434 std::string expectedContent; 2435 if (pid1 < pid2) { 2436 expectedContent = absl::StrCat(pid1, " ", pid2, " "); 2437 } else { 2438 expectedContent = absl::StrCat(pid2, " ", pid1, " "); 2439 } 2440 EXPECT_EQ(expectedContent, proc_children_file); 2441 } 2442 2443 TEST(ProcTask, TaskDirCannotBeDeleted) { 2444 // Drop capabilities that allow us to override file and directory permissions. 2445 AutoCapability cap(CAP_DAC_OVERRIDE, false); 2446 2447 EXPECT_THAT(rmdir("/proc/self/task"), SyscallFails()); 2448 EXPECT_THAT(rmdir(absl::StrCat("/proc/self/task/", getpid()).c_str()), 2449 SyscallFailsWithErrno(EACCES)); 2450 } 2451 2452 TEST(ProcTask, TaskDirHasCorrectMetadata) { 2453 struct stat st; 2454 EXPECT_THAT(stat("/proc/self/task", &st), SyscallSucceeds()); 2455 EXPECT_TRUE(S_ISDIR(st.st_mode)); 2456 2457 // Verify file is readable and executable by everyone. 2458 mode_t expected_permissions = 2459 S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; 2460 mode_t permissions = st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO); 2461 EXPECT_EQ(expected_permissions, permissions); 2462 } 2463 2464 TEST(ProcTask, TaskDirCanSeekToEnd) { 2465 const FileDescriptor dirfd = 2466 ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/task", O_RDONLY)); 2467 EXPECT_THAT(lseek(dirfd.get(), 0, SEEK_END), SyscallSucceeds()); 2468 } 2469 2470 TEST(ProcTask, VerifyTaskDirNlinks) { 2471 const auto fn = [] { 2472 // A task directory will have 3 links if the taskgroup has a single 2473 // thread. For example, the following shows where the links to 2474 // '/proc/12345/task' comes from for a single threaded process with pid 2475 // 12345: 2476 // 2477 // /proc/12345/task <-- 1 link for the directory itself 2478 // . <-- link from "." 2479 // .. 2480 // 12345 2481 // . 2482 // .. <-- link from ".." to parent. 2483 // <other contents of a task dir> 2484 // 2485 // We can't assert an absolute number of links since we don't control how 2486 // many threads the test framework spawns. Instead, we'll ensure creating a 2487 // new thread increases the number of links as expected. 2488 2489 // Once we reach the test body, we can count on the thread count being 2490 // stable unless we spawn a new one. 2491 const uint64_t initial_links = 2492 TEST_CHECK_NO_ERRNO_AND_VALUE(Links("/proc/self/task")); 2493 TEST_CHECK(initial_links >= 3); 2494 2495 // For each new subtask, we should gain a new link. 2496 BlockingChild child1; 2497 uint64_t links = TEST_CHECK_NO_ERRNO_AND_VALUE(Links("/proc/self/task")); 2498 TEST_CHECK(links == initial_links + 1); 2499 2500 BlockingChild child2; 2501 links = TEST_CHECK_NO_ERRNO_AND_VALUE(Links("/proc/self/task")); 2502 TEST_CHECK(links == initial_links + 2); 2503 }; 2504 // Run as a forked process to prevent terminating tasks from other tests to 2505 // show up here and race with the count. 2506 EXPECT_THAT(InForkedProcess(fn), IsPosixErrorOkAndHolds(0)); 2507 } 2508 2509 TEST(ProcTask, CommContainsThreadNameAndTrailingNewline) { 2510 constexpr char kThreadName[] = "TestThread12345"; 2511 ASSERT_THAT(prctl(PR_SET_NAME, kThreadName), SyscallSucceeds()); 2512 2513 auto thread_name = ASSERT_NO_ERRNO_AND_VALUE( 2514 GetContents(JoinPath("/proc", absl::StrCat(getpid()), "task", 2515 absl::StrCat(syscall(SYS_gettid)), "comm"))); 2516 EXPECT_EQ(absl::StrCat(kThreadName, "\n"), thread_name); 2517 } 2518 2519 TEST(ProcTask, CommCanSetSelfThreadName) { 2520 auto path = JoinPath("/proc", absl::StrCat(getpid()), "task", 2521 absl::StrCat(syscall(SYS_gettid)), "comm"); 2522 constexpr char kThreadName[] = "TestThread12345"; 2523 ASSERT_NO_ERRNO(SetContents(path, kThreadName)); 2524 2525 auto got_thread_name = ASSERT_NO_ERRNO_AND_VALUE(GetContents(path)); 2526 EXPECT_EQ(absl::StrCat(kThreadName, "\n"), got_thread_name); 2527 } 2528 2529 TEST(ProcTask, CommCanSetPeerThreadName) { 2530 constexpr char kThreadName[] = "TestThread12345"; 2531 2532 // Path correspond to *this* thread's tid. We will changed it from the new 2533 // thread created below. 2534 auto path = JoinPath("/proc", absl::StrCat(getpid()), "task", 2535 absl::StrCat(syscall(SYS_gettid)), "comm"); 2536 2537 // Start a thread that will set this parent threads name. 2538 ScopedThread peer_thread( 2539 [&]() { ASSERT_NO_ERRNO(SetContents(path, kThreadName)); }); 2540 2541 peer_thread.Join(); 2542 2543 // Our thread name should have been updated. 2544 auto got_thread_name = ASSERT_NO_ERRNO_AND_VALUE(GetContents(path)); 2545 EXPECT_EQ(absl::StrCat(kThreadName, "\n"), got_thread_name); 2546 } 2547 2548 TEST(ProcTask, CommCannotSetAnotherProcessThreadName) { 2549 // Path correspond to *this* thread's pid and tid. 2550 auto path = JoinPath("/proc", absl::StrCat(getpid()), "task", 2551 absl::StrCat(syscall(SYS_gettid)), "comm"); 2552 2553 auto rest = [&] { 2554 // New process is allowed to open the file, even for writing, since the 2555 // owning user is the same. 2556 int fd; 2557 TEST_CHECK_SUCCESS(fd = open(path.c_str(), O_WRONLY)); 2558 2559 // Write gets EINVAL since the thread group is different. See Linux 2560 // fs/proc/base.c:comm_write. 2561 TEST_CHECK_ERRNO(write(fd, "x", 1), EINVAL); 2562 }; 2563 2564 EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); 2565 } 2566 2567 TEST(ProcTask, CommLenLimited) { 2568 auto path = JoinPath("/proc", absl::StrCat(getpid()), "task", 2569 absl::StrCat(syscall(SYS_gettid)), "comm"); 2570 // comm is limited by 15 symbols (TASK_COMM_LEN). 2571 constexpr char kThreadName[] = "0123456789abcde"; 2572 ASSERT_NO_ERRNO(SetContents(path, absl::StrCat(kThreadName, "XYZ"))); 2573 2574 auto got_thread_name = ASSERT_NO_ERRNO_AND_VALUE(GetContents(path)); 2575 EXPECT_EQ(absl::StrCat(kThreadName, "\n"), got_thread_name); 2576 } 2577 2578 TEST(ProcTaskNs, NsDirExistsAndHasCorrectMetadata) { 2579 EXPECT_NO_ERRNO(DirContains("/proc/self/ns", {"net", "pid", "user"}, {})); 2580 2581 // Let's just test the 'pid' entry, all of them are very similar. 2582 struct stat st; 2583 EXPECT_THAT(lstat("/proc/self/ns/pid", &st), SyscallSucceeds()); 2584 EXPECT_TRUE(S_ISLNK(st.st_mode)); 2585 2586 auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/ns/pid")); 2587 EXPECT_THAT(link, ::testing::StartsWith("pid:[")); 2588 } 2589 2590 TEST(ProcTaskNs, AccessOnNsNodeSucceeds) { 2591 EXPECT_THAT(access("/proc/self/ns/pid", F_OK), SyscallSucceeds()); 2592 } 2593 2594 TEST(ProcSysKernelHostname, Exists) { 2595 EXPECT_THAT(open("/proc/sys/kernel/hostname", O_RDONLY), SyscallSucceeds()); 2596 } 2597 2598 TEST(ProcSysKernelHostname, MatchesUname) { 2599 struct utsname buf; 2600 EXPECT_THAT(uname(&buf), SyscallSucceeds()); 2601 const std::string hostname = absl::StrCat(buf.nodename, "\n"); 2602 auto procfs_hostname = 2603 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/hostname")); 2604 EXPECT_EQ(procfs_hostname, hostname); 2605 } 2606 2607 TEST(ProcSysVmMaxmapCount, HasNumericValue) { 2608 const std::string val_str = 2609 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/max_map_count")); 2610 int32_t val; 2611 EXPECT_TRUE(absl::SimpleAtoi(val_str, &val)) 2612 << "/proc/sys/vm/max_map_count does not contain a numeric value: " 2613 << val_str; 2614 } 2615 2616 TEST(ProcSysVmMmapMinAddr, HasNumericValue) { 2617 const std::string mmap_min_addr_str = 2618 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/mmap_min_addr")); 2619 uintptr_t mmap_min_addr; 2620 EXPECT_TRUE(absl::SimpleAtoi(mmap_min_addr_str, &mmap_min_addr)) 2621 << "/proc/sys/vm/mmap_min_addr does not contain a numeric value: " 2622 << mmap_min_addr_str; 2623 } 2624 2625 TEST(ProcSysVmOvercommitMemory, HasNumericValue) { 2626 const std::string overcommit_memory_str = 2627 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/overcommit_memory")); 2628 uintptr_t overcommit_memory; 2629 EXPECT_TRUE(absl::SimpleAtoi(overcommit_memory_str, &overcommit_memory)) 2630 << "/proc/sys/vm/overcommit_memory does not contain a numeric value: " 2631 << overcommit_memory; 2632 } 2633 2634 // Check that link for proc fd entries point the target node, not the 2635 // symlink itself. Regression test for b/31155070. 2636 TEST(ProcTaskFd, FstatatFollowsSymlink) { 2637 const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); 2638 const FileDescriptor fd = 2639 ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); 2640 2641 struct stat sproc = {}; 2642 EXPECT_THAT( 2643 fstatat(-1, absl::StrCat("/proc/self/fd/", fd.get()).c_str(), &sproc, 0), 2644 SyscallSucceeds()); 2645 2646 struct stat sfile = {}; 2647 EXPECT_THAT(fstatat(-1, file.path().c_str(), &sfile, 0), SyscallSucceeds()); 2648 2649 // If fstatat follows the fd symlink, the device and inode numbers should 2650 // match at a minimum. 2651 EXPECT_EQ(sproc.st_dev, sfile.st_dev); 2652 EXPECT_EQ(sproc.st_ino, sfile.st_ino); 2653 EXPECT_EQ(0, memcmp(&sfile, &sproc, sizeof(sfile))); 2654 } 2655 2656 TEST(ProcFilesystems, Bug65172365) { 2657 std::string proc_filesystems = 2658 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/filesystems")); 2659 ASSERT_FALSE(proc_filesystems.empty()); 2660 } 2661 2662 // Check that /proc/mounts is a symlink to self/mounts. 2663 TEST(ProcMounts, IsSymlink) { 2664 auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/mounts")); 2665 EXPECT_EQ(link, "self/mounts"); 2666 } 2667 2668 TEST(ProcSelfMountinfo, RequiredFieldsArePresent) { 2669 auto mountinfo = 2670 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mountinfo")); 2671 EXPECT_THAT( 2672 mountinfo, 2673 AllOf( 2674 // Root mount. 2675 ContainsRegex( 2676 R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ /\S* / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"), 2677 // Proc mount - always rw. 2678 ContainsRegex( 2679 R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / /proc rw.*- \S+ \S+ rw\S*)"))); 2680 } 2681 2682 TEST(ProcSelfMountinfo, ContainsProcfsEntry) { 2683 const std::vector<ProcMountInfoEntry> entries = 2684 ASSERT_NO_ERRNO_AND_VALUE(ProcSelfMountInfoEntries()); 2685 bool found = false; 2686 for (const auto& e : entries) { 2687 if (e.fstype == "proc") { 2688 found = true; 2689 break; 2690 } 2691 } 2692 EXPECT_TRUE(found); 2693 } 2694 2695 // Check that /proc/self/mounts looks something like a real mounts file. 2696 TEST(ProcSelfMounts, RequiredFieldsArePresent) { 2697 auto mounts = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mounts")); 2698 EXPECT_THAT(mounts, 2699 AllOf( 2700 // Root mount. 2701 ContainsRegex(R"(\S+ / \S+ (rw|ro)\S* [0-9]+ [0-9]+\s)"), 2702 // Root mount. 2703 ContainsRegex(R"(\S+ /proc \S+ rw\S* [0-9]+ [0-9]+\s)"))); 2704 } 2705 2706 TEST(ProcSelfMounts, ContainsProcfsEntry) { 2707 const std::vector<ProcMountsEntry> entries = 2708 ASSERT_NO_ERRNO_AND_VALUE(ProcSelfMountsEntries()); 2709 bool found = false; 2710 for (const auto& e : entries) { 2711 if (e.fstype == "proc") { 2712 found = true; 2713 break; 2714 } 2715 } 2716 EXPECT_TRUE(found); 2717 } 2718 2719 void CheckDuplicatesRecursively(std::string path) { 2720 std::vector<std::string> child_dirs; 2721 2722 // There is the known issue of the linux procfs, that two consequent calls of 2723 // readdir can return the same entry twice if between these calls one or more 2724 // entries have been removed from this directory. 2725 int max_attempts = 5; 2726 for (int i = 0; i < max_attempts; i++) { 2727 child_dirs.clear(); 2728 errno = 0; 2729 bool success = true; 2730 DIR* dir = opendir(path.c_str()); 2731 if (dir == nullptr) { 2732 // Ignore any directories we can't read or missing directories as the 2733 // directory could have been deleted/mutated from the time the parent 2734 // directory contents were read. 2735 return; 2736 } 2737 auto dir_closer = Cleanup([&dir]() { closedir(dir); }); 2738 absl::flat_hash_set<std::string> children; 2739 while (true) { 2740 // Readdir(3): If the end of the directory stream is reached, NULL is 2741 // returned and errno is not changed. If an error occurs, NULL is 2742 // returned and errno is set appropriately. To distinguish end of stream 2743 // and from an error, set errno to zero before calling readdir() and then 2744 // check the value of errno if NULL is returned. 2745 errno = 0; 2746 struct dirent* dp = readdir(dir); 2747 if (dp == nullptr) { 2748 // Linux will return EINVAL when calling getdents on a /proc/tid/net 2749 // file corresponding to a zombie task. 2750 // See fs/proc/proc_net.c:proc_tgid_net_readdir(). 2751 // 2752 // We just ignore the directory in this case. 2753 if (errno == EINVAL && absl::StartsWith(path, "/proc/") && 2754 absl::EndsWith(path, "/net")) { 2755 break; 2756 } 2757 // We may also see permission failures traversing some files. 2758 if (errno == EACCES && absl::StartsWith(path, "/proc/")) { 2759 break; 2760 } 2761 2762 // Otherwise, no errors are allowed. 2763 ASSERT_EQ(errno, 0) << path; 2764 break; // We're done. 2765 } 2766 2767 const std::string name = dp->d_name; 2768 2769 if (name == "." || name == "..") { 2770 continue; 2771 } 2772 2773 // Ignore a duplicate entry if it isn't the last attempt. 2774 if (i == max_attempts - 1) { 2775 ASSERT_EQ(children.find(name), children.end()) 2776 << absl::StrCat(path, "/", name); 2777 } else if (children.find(name) != children.end()) { 2778 std::cerr << "Duplicate entry: " << i << ":" 2779 << absl::StrCat(path, "/", name) << std::endl; 2780 success = false; 2781 break; 2782 } 2783 children.insert(name); 2784 2785 if (dp->d_type == DT_DIR) { 2786 child_dirs.push_back(name); 2787 } 2788 } 2789 if (success) { 2790 break; 2791 } 2792 } 2793 for (auto dname = child_dirs.begin(); dname != child_dirs.end(); dname++) { 2794 CheckDuplicatesRecursively(absl::StrCat(path, "/", *dname)); 2795 } 2796 } 2797 2798 TEST(Proc, NoDuplicates) { CheckDuplicatesRecursively("/proc"); } 2799 2800 // Most /proc/PID files are owned by the task user with SUID_DUMP_USER. 2801 TEST(ProcPid, UserDumpableOwner) { 2802 int before; 2803 ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); 2804 auto cleanup = Cleanup([before] { 2805 ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); 2806 }); 2807 2808 EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_USER), SyscallSucceeds()); 2809 2810 // This applies to the task directory itself and files inside. 2811 struct stat st; 2812 ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds()); 2813 EXPECT_EQ(st.st_uid, geteuid()); 2814 EXPECT_EQ(st.st_gid, getegid()); 2815 2816 ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds()); 2817 EXPECT_EQ(st.st_uid, geteuid()); 2818 EXPECT_EQ(st.st_gid, getegid()); 2819 } 2820 2821 // /proc/PID files are owned by root with SUID_DUMP_DISABLE. 2822 TEST(ProcPid, RootDumpableOwner) { 2823 int before; 2824 ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); 2825 auto cleanup = Cleanup([before] { 2826 ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); 2827 }); 2828 2829 EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE), SyscallSucceeds()); 2830 2831 // This *does not* applies to the task directory itself (or other 0555 2832 // directories), but does to files inside. 2833 struct stat st; 2834 ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds()); 2835 EXPECT_EQ(st.st_uid, geteuid()); 2836 EXPECT_EQ(st.st_gid, getegid()); 2837 2838 // This file is owned by root. Also allow nobody in case this test is running 2839 // in a userns without root mapped. 2840 ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds()); 2841 EXPECT_THAT(st.st_uid, AnyOf(Eq(0), Eq(65534))); 2842 EXPECT_THAT(st.st_gid, AnyOf(Eq(0), Eq(65534))); 2843 } 2844 2845 TEST(Proc, GetdentsEnoent) { 2846 FileDescriptor fd; 2847 ASSERT_NO_ERRNO(WithSubprocess( 2848 [&](int pid) -> PosixError { 2849 // Running. 2850 ASSIGN_OR_RETURN_ERRNO(fd, Open(absl::StrCat("/proc/", pid, "/task"), 2851 O_RDONLY | O_DIRECTORY)); 2852 2853 return NoError(); 2854 }, 2855 nullptr, nullptr)); 2856 char buf[1024]; 2857 ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)), 2858 SyscallFailsWithErrno(ENOENT)); 2859 } 2860 2861 void CheckSyscwFromIOFile(const std::string& path, const std::string& regex) { 2862 std::string output; 2863 ASSERT_NO_ERRNO(GetContents(path, &output)); 2864 ASSERT_THAT(output, ContainsRegex(absl::StrCat("syscw:\\s+", regex, "\n"))); 2865 } 2866 2867 // Checks that there is variable accounting of IO between threads/tasks. 2868 TEST(Proc, PidTidIOAccounting) { 2869 absl::Notification notification; 2870 2871 // Run a thread with a bunch of writes. Check that io account records exactly 2872 // the number of write calls. File open/close is there to prevent buffering. 2873 ScopedThread writer([¬ification] { 2874 const int num_writes = 100; 2875 for (int i = 0; i < num_writes; i++) { 2876 auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); 2877 ASSERT_NO_ERRNO(SetContents(path.path(), "a")); 2878 } 2879 notification.Notify(); 2880 const std::string& writer_dir = 2881 absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); 2882 2883 CheckSyscwFromIOFile(writer_dir, std::to_string(num_writes)); 2884 }); 2885 2886 // Run a thread and do no writes. Check that no writes are recorded. 2887 ScopedThread noop([¬ification] { 2888 notification.WaitForNotification(); 2889 const std::string& noop_dir = 2890 absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); 2891 2892 CheckSyscwFromIOFile(noop_dir, "0"); 2893 }); 2894 2895 writer.Join(); 2896 noop.Join(); 2897 } 2898 2899 TEST(Proc, Statfs) { 2900 struct statfs st; 2901 EXPECT_THAT(statfs("/proc", &st), SyscallSucceeds()); 2902 EXPECT_EQ(st.f_type, PROC_SUPER_MAGIC); 2903 EXPECT_EQ(st.f_bsize, getpagesize()); 2904 EXPECT_EQ(st.f_namelen, NAME_MAX); 2905 } 2906 2907 // Tests that /proc/[pid]/fd/[num] can resolve to a path inside /proc. 2908 TEST(Proc, ResolveSymlinkToProc) { 2909 const auto proc = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/cmdline", 0)); 2910 const auto path = JoinPath("/proc/self/fd/", absl::StrCat(proc.get())); 2911 const auto target = ASSERT_NO_ERRNO_AND_VALUE(ReadLink(path)); 2912 EXPECT_EQ(target, JoinPath("/proc/", absl::StrCat(getpid()), "/cmdline")); 2913 } 2914 2915 // NOTE(b/236035339): Tests that opening /proc/[pid]/fd/[eventFDNum] with 2916 // O_DIRECTORY leads to ENOTDIR. 2917 TEST(Proc, RegressionTestB236035339) { 2918 FileDescriptor efd = 2919 ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_CLOEXEC)); 2920 const auto path = JoinPath("/proc/self/fd/", absl::StrCat(efd.get())); 2921 EXPECT_THAT(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_DIRECTORY), 2922 SyscallFailsWithErrno(ENOTDIR)); 2923 } 2924 2925 // NOTE(b/338393279): Tests that after execve() from a non-leader thread 2926 // changes which thread owns the thread group ID, the new thread group leader 2927 // can access its /proc/self. 2928 TEST(Proc, PidReuse) { 2929 const ExecveArray owned_child_argv = {"/proc/self/exe", 2930 "--proc_pid_reuse_child"}; 2931 char* const* const child_argv = owned_child_argv.get(); 2932 2933 const auto rest = [child_argv] { 2934 struct stat statbuf; 2935 TEST_PCHECK(stat("/proc/self/cwd", &statbuf) == 0); 2936 2937 ScopedThread([child_argv] { 2938 execve(child_argv[0], child_argv, /* envp = */ nullptr); 2939 TEST_PCHECK_MSG(false, "Survived execve to test child"); 2940 }); 2941 }; 2942 EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); 2943 } 2944 2945 [[noreturn]] void RunProcPidReuseChild() { 2946 struct stat statbuf; 2947 TEST_PCHECK(stat("/proc/self/cwd", &statbuf) == 0); 2948 _exit(0); 2949 } 2950 2951 TEST(ProcFilesystems, ReadCapLastCap) { 2952 std::string lastCapStr = 2953 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/cap_last_cap")); 2954 2955 uint64_t lastCap; 2956 ASSERT_TRUE(absl::SimpleAtoi(lastCapStr, &lastCap)); 2957 EXPECT_TRUE(lastCap > 32 && lastCap < 64); 2958 } 2959 2960 TEST(ProcFilesystems, OverflowID) { 2961 std::string overflowGidStr = 2962 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/overflowgid")); 2963 std::string overflowUidStr = 2964 ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/overflowuid")); 2965 uint64_t overflowGid, overflowUid; 2966 ASSERT_TRUE(absl::SimpleAtoi(overflowGidStr, &overflowGid)); 2967 ASSERT_TRUE(absl::SimpleAtoi(overflowUidStr, &overflowUid)); 2968 2969 const uint64_t defaultOverflowID = 65534; 2970 EXPECT_EQ(overflowGid, defaultOverflowID); 2971 EXPECT_EQ(overflowUid, defaultOverflowID); 2972 } 2973 2974 } // namespace 2975 } // namespace testing 2976 } // namespace gvisor 2977 2978 int main(int argc, char** argv) { 2979 for (int i = 0; i < argc; ++i) { 2980 gvisor::testing::saved_argv.emplace_back(std::string(argv[i])); 2981 } 2982 2983 gvisor::testing::TestInit(&argc, &argv); 2984 2985 if (absl::GetFlag(FLAGS_proc_pid_reuse_child)) { 2986 gvisor::testing::RunProcPidReuseChild(); 2987 } 2988 2989 return gvisor::testing::RunAllTests(); 2990 }