github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/fork.cc (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <sched.h> 18 #include <stdlib.h> 19 #include <sys/mman.h> 20 #include <sys/stat.h> 21 #include <sys/types.h> 22 #include <unistd.h> 23 24 #include <atomic> 25 #include <cstdlib> 26 27 #include "gtest/gtest.h" 28 #include "absl/time/clock.h" 29 #include "absl/time/time.h" 30 #include "test/util/capability_util.h" 31 #include "test/util/logging.h" 32 #include "test/util/memory_util.h" 33 #include "test/util/test_util.h" 34 #include "test/util/thread_util.h" 35 36 namespace gvisor { 37 namespace testing { 38 39 namespace { 40 41 using ::testing::Ge; 42 43 class ForkTest : public ::testing::Test { 44 protected: 45 // SetUp creates a populated, open file. 46 void SetUp() override { 47 // Make a shared mapping. 48 shared_ = reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE, 49 MAP_SHARED | MAP_ANONYMOUS, -1, 0)); 50 ASSERT_NE(reinterpret_cast<void*>(shared_), MAP_FAILED); 51 52 // Make a private mapping. 53 private_ = 54 reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE, 55 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); 56 ASSERT_NE(reinterpret_cast<void*>(private_), MAP_FAILED); 57 58 // Make a pipe. 59 ASSERT_THAT(pipe(pipes_), SyscallSucceeds()); 60 } 61 62 // TearDown frees associated resources. 63 void TearDown() override { 64 EXPECT_THAT(munmap(shared_, kPageSize), SyscallSucceeds()); 65 EXPECT_THAT(munmap(private_, kPageSize), SyscallSucceeds()); 66 EXPECT_THAT(close(pipes_[0]), SyscallSucceeds()); 67 EXPECT_THAT(close(pipes_[1]), SyscallSucceeds()); 68 } 69 70 // Fork executes a clone system call. 71 pid_t Fork() { 72 pid_t pid = fork(); 73 MaybeSave(); 74 TEST_PCHECK_MSG(pid >= 0, "fork failed"); 75 return pid; 76 } 77 78 // Wait waits for the given pid and returns the exit status. If the child was 79 // killed by a signal or an error occurs, then 256+signal is returned. 80 int Wait(pid_t pid) { 81 int status; 82 while (true) { 83 int rval = wait4(pid, &status, 0, NULL); 84 if (rval < 0) { 85 return rval; 86 } 87 if (rval != pid) { 88 continue; 89 } 90 if (WIFEXITED(status)) { 91 return WEXITSTATUS(status); 92 } 93 if (WIFSIGNALED(status)) { 94 return 256 + WTERMSIG(status); 95 } 96 } 97 } 98 99 // Exit exits the proccess. 100 void Exit(int code) { 101 _exit(code); 102 103 // Should never reach here. Since the exit above failed, we really don't 104 // have much in the way of options to indicate failure. So we just try to 105 // log an assertion failure to the logs. The parent process will likely 106 // fail anyways if exit is not working. 107 TEST_CHECK_MSG(false, "_exit returned"); 108 } 109 110 // ReadByte reads a byte from the shared pipe. 111 char ReadByte() { 112 char val = -1; 113 TEST_PCHECK(ReadFd(pipes_[0], &val, 1) == 1); 114 MaybeSave(); 115 return val; 116 } 117 118 // WriteByte writes a byte from the shared pipe. 119 void WriteByte(char val) { 120 TEST_PCHECK(WriteFd(pipes_[1], &val, 1) == 1); 121 MaybeSave(); 122 } 123 124 // Shared pipe. 125 int pipes_[2]; 126 127 // Shared mapping (one page). 128 char* shared_; 129 130 // Private mapping (one page). 131 char* private_; 132 }; 133 134 TEST_F(ForkTest, Simple) { 135 pid_t child = Fork(); 136 if (child == 0) { 137 Exit(0); 138 } 139 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); 140 } 141 142 TEST_F(ForkTest, ExitCode) { 143 pid_t child = Fork(); 144 if (child == 0) { 145 Exit(123); 146 } 147 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(123)); 148 child = Fork(); 149 if (child == 0) { 150 Exit(1); 151 } 152 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(1)); 153 } 154 155 TEST_F(ForkTest, Multi) { 156 pid_t child1 = Fork(); 157 if (child1 == 0) { 158 Exit(0); 159 } 160 pid_t child2 = Fork(); 161 if (child2 == 0) { 162 Exit(1); 163 } 164 EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0)); 165 EXPECT_THAT(Wait(child2), SyscallSucceedsWithValue(1)); 166 } 167 168 TEST_F(ForkTest, Pipe) { 169 pid_t child = Fork(); 170 if (child == 0) { 171 WriteByte(1); 172 Exit(0); 173 } 174 EXPECT_EQ(ReadByte(), 1); 175 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); 176 } 177 178 TEST_F(ForkTest, SharedMapping) { 179 pid_t child = Fork(); 180 if (child == 0) { 181 // Wait for the parent. 182 ReadByte(); 183 if (shared_[0] == 1) { 184 Exit(0); 185 } 186 // Failed. 187 Exit(1); 188 } 189 // Change the mapping. 190 ASSERT_EQ(shared_[0], 0); 191 shared_[0] = 1; 192 // Unblock the child. 193 WriteByte(0); 194 // Did it work? 195 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); 196 } 197 198 TEST_F(ForkTest, PrivateMapping) { 199 pid_t child = Fork(); 200 if (child == 0) { 201 // Wait for the parent. 202 ReadByte(); 203 if (private_[0] == 0) { 204 Exit(0); 205 } 206 // Failed. 207 Exit(1); 208 } 209 // Change the mapping. 210 ASSERT_EQ(private_[0], 0); 211 private_[0] = 1; 212 // Unblock the child. 213 WriteByte(0); 214 // Did it work? 215 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); 216 } 217 218 // CPUID is x86 specific. 219 #ifdef __x86_64__ 220 // Test that cpuid works after a fork. 221 TEST_F(ForkTest, Cpuid) { 222 pid_t child = Fork(); 223 224 // We should be able to determine the CPU vendor. 225 ASSERT_NE(GetCPUVendor(), CPUVendor::kUnknownVendor); 226 227 if (child == 0) { 228 Exit(0); 229 } 230 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); 231 } 232 #endif 233 234 TEST_F(ForkTest, Mmap) { 235 pid_t child = Fork(); 236 237 if (child == 0) { 238 void* addr = 239 mmap(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 240 MaybeSave(); 241 Exit(addr == MAP_FAILED); 242 } 243 244 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); 245 } 246 247 static volatile int alarmed = 0; 248 249 void AlarmHandler(int sig, siginfo_t* info, void* context) { alarmed = 1; } 250 251 TEST_F(ForkTest, Alarm) { 252 // Setup an alarm handler. 253 struct sigaction sa; 254 sa.sa_sigaction = AlarmHandler; 255 sigfillset(&sa.sa_mask); 256 sa.sa_flags = SA_SIGINFO; 257 EXPECT_THAT(sigaction(SIGALRM, &sa, nullptr), SyscallSucceeds()); 258 259 pid_t child = Fork(); 260 261 if (child == 0) { 262 alarm(1); 263 sleep(3); 264 if (!alarmed) { 265 Exit(1); 266 } 267 Exit(0); 268 } 269 270 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); 271 EXPECT_EQ(0, alarmed); 272 } 273 274 // Child cannot affect parent private memory. Regression test for b/24137240. 275 TEST_F(ForkTest, PrivateMemory) { 276 std::atomic<uint32_t> local(0); 277 278 pid_t child1 = Fork(); 279 if (child1 == 0) { 280 local++; 281 282 pid_t child2 = Fork(); 283 if (child2 == 0) { 284 local++; 285 286 TEST_CHECK(local.load() == 2); 287 288 Exit(0); 289 } 290 291 TEST_PCHECK(Wait(child2) == 0); 292 TEST_CHECK(local.load() == 1); 293 Exit(0); 294 } 295 296 EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0)); 297 EXPECT_EQ(0, local.load()); 298 } 299 300 // Kernel-accessed buffers should remain coherent across COW. 301 // 302 // The buffer must be >= usermem.ZeroCopyMinBytes, as UnsafeAccess operates 303 // differently. Regression test for b/33811887. 304 TEST_F(ForkTest, COWSegment) { 305 constexpr int kBufSize = 1024; 306 char* read_buf = private_; 307 char* touch = private_ + kPageSize / 2; 308 309 std::string contents(kBufSize, 'a'); 310 311 ScopedThread t([&] { 312 // Wait to be sure the parent is blocked in read. 313 absl::SleepFor(absl::Seconds(3)); 314 315 // Fork to mark private pages for COW. 316 // 317 // Use fork directly rather than the Fork wrapper to skip the multi-threaded 318 // check, and limit the child to async-signal-safe functions: 319 // 320 // "After a fork() in a multithreaded program, the child can safely call 321 // only async-signal-safe functions (see signal(7)) until such time as it 322 // calls execve(2)." 323 // 324 // Skip ASSERT in the child, as it isn't async-signal-safe. 325 pid_t child = fork(); 326 if (child == 0) { 327 // Wait to be sure parent touched memory. 328 sleep(3); 329 Exit(0); 330 } 331 332 // Check success only in the parent. 333 ASSERT_THAT(child, SyscallSucceedsWithValue(Ge(0))); 334 335 // Trigger COW on private page. 336 *touch = 42; 337 338 // Write to pipe. Parent should still be able to read this. 339 EXPECT_THAT(WriteFd(pipes_[1], contents.c_str(), kBufSize), 340 SyscallSucceedsWithValue(kBufSize)); 341 342 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); 343 }); 344 345 EXPECT_THAT(ReadFd(pipes_[0], read_buf, kBufSize), 346 SyscallSucceedsWithValue(kBufSize)); 347 EXPECT_STREQ(contents.c_str(), read_buf); 348 } 349 350 TEST_F(ForkTest, SigAltStack) { 351 std::vector<char> stack_mem(SIGSTKSZ); 352 stack_t stack = {}; 353 stack.ss_size = SIGSTKSZ; 354 stack.ss_sp = stack_mem.data(); 355 ASSERT_THAT(sigaltstack(&stack, nullptr), SyscallSucceeds()); 356 357 pid_t child = Fork(); 358 359 if (child == 0) { 360 stack_t oss = {}; 361 TEST_PCHECK(sigaltstack(nullptr, &oss) == 0); 362 MaybeSave(); 363 364 TEST_CHECK((oss.ss_flags & SS_DISABLE) == 0); 365 TEST_CHECK(oss.ss_size == SIGSTKSZ); 366 TEST_CHECK(oss.ss_sp == stack.ss_sp); 367 368 Exit(0); 369 } 370 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); 371 } 372 373 TEST_F(ForkTest, Affinity) { 374 // Make a non-default cpumask. 375 cpu_set_t parent_mask; 376 EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask), 377 SyscallSucceeds()); 378 // Knock out the lowest bit. 379 for (unsigned int n = 0; n < CPU_SETSIZE; n++) { 380 if (CPU_ISSET(n, &parent_mask)) { 381 CPU_CLR(n, &parent_mask); 382 break; 383 } 384 } 385 EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask), 386 SyscallSucceeds()); 387 388 pid_t child = Fork(); 389 if (child == 0) { 390 cpu_set_t child_mask; 391 392 int ret = sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &child_mask); 393 MaybeSave(); 394 if (ret < 0) { 395 Exit(-ret); 396 } 397 398 TEST_CHECK(CPU_EQUAL(&child_mask, &parent_mask)); 399 400 Exit(0); 401 } 402 403 EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); 404 } 405 406 TEST(CloneTest, NewUserNamespacePermitsAllOtherNamespaces) { 407 // "If CLONE_NEWUSER is specified along with other CLONE_NEW* flags in a 408 // single clone(2) or unshare(2) call, the user namespace is guaranteed to be 409 // created first, giving the child (clone(2)) or caller (unshare(2)) 410 // privileges over the remaining namespaces created by the call. Thus, it is 411 // possible for an unprivileged caller to specify this combination of flags." 412 // - user_namespaces(7) 413 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); 414 Mapping child_stack = ASSERT_NO_ERRNO_AND_VALUE( 415 MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); 416 int child_pid; 417 // We only test with CLONE_NEWIPC, CLONE_NEWNET, and CLONE_NEWUTS since these 418 // namespaces were implemented in Linux before user namespaces. 419 ASSERT_THAT( 420 child_pid = clone( 421 +[](void*) { return 0; }, 422 reinterpret_cast<void*>(child_stack.addr() + kPageSize), 423 CLONE_NEWUSER | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUTS | SIGCHLD, 424 /* arg = */ nullptr), 425 SyscallSucceeds()); 426 427 int status; 428 ASSERT_THAT(waitpid(child_pid, &status, 0), 429 SyscallSucceedsWithValue(child_pid)); 430 EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) 431 << "status = " << status; 432 } 433 434 // Clone with CLONE_SETTLS and a non-canonical TLS address is rejected. 435 TEST(CloneTest, NonCanonicalTLS) { 436 constexpr uintptr_t kNonCanonical = 1ull << 48; 437 438 // We need a valid address for the stack pointer. We'll never actually execute 439 // on this. 440 char stack; 441 442 // The raw system call interface on x86-64 is: 443 // long clone(unsigned long flags, void *stack, 444 // int *parent_tid, int *child_tid, 445 // unsigned long tls); 446 // 447 // While on arm64, the order of the last two arguments is reversed: 448 // long clone(unsigned long flags, void *stack, 449 // int *parent_tid, unsigned long tls, 450 // int *child_tid); 451 #if defined(__x86_64__) 452 EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr, 453 nullptr, kNonCanonical), 454 SyscallFailsWithErrno(EPERM)); 455 #elif defined(__aarch64__) 456 EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr, 457 kNonCanonical, nullptr), 458 SyscallFailsWithErrno(EPERM)); 459 #endif 460 } 461 462 } // namespace 463 } // namespace testing 464 } // namespace gvisor