github.com/schwarzm/garden-linux@v0.0.0-20150507151835-33bca2147c47/old/linux_backend/src/wsh/wshd.c (about) 1 #define _GNU_SOURCE 2 3 #include <assert.h> 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <sched.h> 7 #include <signal.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #include <sys/ioctl.h> 12 #include <sys/ipc.h> 13 #include <sys/mount.h> 14 #include <sys/param.h> 15 #include <sys/resource.h> 16 #include <sys/shm.h> 17 #include <sys/signalfd.h> 18 #include <sys/socket.h> 19 #include <sys/stat.h> 20 #include <sys/types.h> 21 #include <sys/wait.h> 22 #include <termios.h> 23 #include <unistd.h> 24 25 #include "barrier.h" 26 #include "msg.h" 27 #include "pty.h" 28 #include "pwd.h" 29 #include "un.h" 30 #include "util.h" 31 32 typedef struct wshd_s wshd_t; 33 34 struct wshd_s { 35 /* Path to directory where server socket is placed */ 36 char run_path[256]; 37 38 /* Path to directory containing hooks */ 39 char lib_path[256]; 40 41 /* Path to directory that will become root in the new mount namespace */ 42 char root_path[256]; 43 44 /* Process title */ 45 char title[32]; 46 47 /* Extra flags to pass to clone operation */ 48 int clone_flags; 49 50 /* File descriptor of listening socket */ 51 int fd; 52 53 barrier_t barrier_parent; 54 barrier_t barrier_child; 55 56 /* Map pids to exit status fds */ 57 struct { 58 pid_t pid; 59 int fd; 60 } *pid_to_fd; 61 size_t pid_to_fd_len; 62 }; 63 64 int wshd__usage(wshd_t *w, int argc, char **argv) { 65 fprintf(stderr, "Usage: %s OPTION...\n", argv[0]); 66 fprintf(stderr, "\n"); 67 68 fprintf(stderr, " --run PATH " 69 "Directory where server socket is placed" 70 "\n"); 71 72 fprintf(stderr, " --lib PATH " 73 "Directory containing hooks" 74 "\n"); 75 76 fprintf(stderr, " --root PATH " 77 "Directory that will become root in the new mount namespace" 78 "\n"); 79 80 fprintf(stderr, " --title NAME " 81 "Process title" 82 "\n"); 83 84 fprintf(stderr, " --userns 1 " 85 "If specified, use user namespacing" 86 "\n"); 87 88 return 0; 89 } 90 91 int wshd__getopt(wshd_t *w, int argc, char **argv) { 92 int i = 1; 93 int j = argc - i; 94 int rv; 95 96 w->clone_flags = 0; 97 while (i < argc) { 98 if (j >= 2) { 99 if (strcmp("--run", argv[i]) == 0) { 100 rv = snprintf(w->run_path, sizeof(w->run_path), "%s", argv[i+1]); 101 if (rv >= sizeof(w->run_path)) { 102 goto toolong; 103 } 104 } else if (strcmp("--lib", argv[i]) == 0) { 105 rv = snprintf(w->lib_path, sizeof(w->lib_path), "%s", argv[i+1]); 106 if (rv >= sizeof(w->lib_path)) { 107 goto toolong; 108 } 109 } else if (strcmp("--root", argv[i]) == 0) { 110 rv = snprintf(w->root_path, sizeof(w->root_path), "%s", argv[i+1]); 111 if (rv >= sizeof(w->root_path)) { 112 goto toolong; 113 } 114 } else if (strcmp("--title", argv[i]) == 0) { 115 rv = snprintf(w->title, sizeof(w->title), "%s", argv[i+1]); 116 if (rv >= sizeof(w->title)) { 117 goto toolong; 118 } 119 } else if (strcmp("--userns", argv[i]) == 0) { 120 if (strcmp("disabled", argv[i+1]) != 0) { 121 w->clone_flags = CLONE_NEWUSER; 122 } 123 } else { 124 goto invalid; 125 } 126 127 i += 2; 128 j -= 2; 129 } else if (j == 1) { 130 if (strcmp("-h", argv[i]) == 0 || 131 strcmp("--help", argv[i]) == 0) 132 { 133 wshd__usage(w, argc, argv); 134 return -1; 135 } else { 136 goto invalid; 137 } 138 } else { 139 assert(NULL); 140 } 141 } 142 143 return 0; 144 145 toolong: 146 fprintf(stderr, "%s: argument too long -- %s\n", argv[0], argv[i]); 147 fprintf(stderr, "Try `%s --help' for more information.\n", argv[0]); 148 return -1; 149 150 invalid: 151 fprintf(stderr, "%s: invalid option -- %s\n", argv[0], argv[i]); 152 fprintf(stderr, "Try `%s --help' for more information.\n", argv[0]); 153 return -1; 154 } 155 156 void assert_directory(const char *path) { 157 int rv; 158 struct stat st; 159 160 rv = stat(path, &st); 161 if (rv == -1) { 162 fprintf(stderr, "stat(\"%s\"): %s\n", path, strerror(errno)); 163 exit(1); 164 } 165 166 if (!S_ISDIR(st.st_mode)) { 167 fprintf(stderr, "stat(\"%s\"): %s\n", path, "No such directory"); 168 exit(1); 169 } 170 } 171 172 void child_pid_to_fd_add(wshd_t *w, pid_t pid, int fd) { 173 int len = w->pid_to_fd_len; 174 175 /* Store a copy */ 176 fd = dup(fd); 177 if (fd == -1) { 178 perror("dup"); 179 abort(); 180 } 181 182 w->pid_to_fd = realloc(w->pid_to_fd, (len + 1) * sizeof(w->pid_to_fd[0])); 183 assert(w->pid_to_fd != NULL); 184 185 w->pid_to_fd[len].pid = pid; 186 w->pid_to_fd[len].fd = fd; 187 w->pid_to_fd_len++; 188 } 189 190 int child_pid_to_fd_remove(wshd_t *w, pid_t pid) { 191 int i; 192 int len = w->pid_to_fd_len; 193 int fd = -1; 194 195 for (i = 0; i < len; i++) { 196 if (w->pid_to_fd[i].pid == pid) { 197 fd = w->pid_to_fd[i].fd; 198 199 /* Move tail if there is one */ 200 if ((i + 1) < len) { 201 memmove(&w->pid_to_fd[i], &w->pid_to_fd[i+1], (len - i - 1) * sizeof(w->pid_to_fd[0])); 202 } 203 204 w->pid_to_fd = realloc(w->pid_to_fd, (w->pid_to_fd_len - 1) * sizeof(w->pid_to_fd[0])); 205 w->pid_to_fd_len--; 206 207 if (w->pid_to_fd_len) { 208 assert(w->pid_to_fd != NULL); 209 } else { 210 assert(w->pid_to_fd == NULL); 211 } 212 213 break; 214 } 215 } 216 217 return fd; 218 } 219 220 char **env__add(char **envp, const char *key, const char *value) { 221 size_t envplen = 0; 222 char *buf; 223 size_t buflen; 224 int rv; 225 226 if (envp == NULL) { 227 /* Trailing NULL */ 228 envplen = 1; 229 } else { 230 while(envp[envplen++] != NULL); 231 } 232 233 envp = realloc(envp, sizeof(envp[0]) * (envplen + 1)); 234 assert(envp != NULL); 235 236 buflen = strlen(key) + 1 + strlen(value) + 1; 237 buf = malloc(buflen); 238 assert(buf != NULL); 239 240 rv = snprintf(buf, buflen, "%s=%s", key, value); 241 assert(rv == buflen - 1); 242 243 envp[envplen - 1] = buf; 244 envp[envplen] = NULL; 245 246 return envp; 247 } 248 249 const char* env__get(char **envp, const char* key) { 250 if (envp != NULL) { 251 int i = 0; 252 while (envp[i] != NULL) { 253 char* eq = strchr(envp[i], '='); 254 if (eq != NULL) { 255 size_t keyLen = eq - envp[i]; 256 if (strlen(key) == keyLen) { 257 if (memcmp(key, envp[i], keyLen) == 0) { 258 return eq + 1; 259 } 260 } 261 } 262 i++; 263 } 264 } 265 266 return NULL; 267 } 268 269 char **child_setup_environment(struct passwd *pw, char **extra_env_vars) { 270 int rv; 271 char **envp = extra_env_vars; 272 273 rv = chdir(pw->pw_dir); 274 if (rv == -1) { 275 perror("chdir"); 276 return NULL; 277 } 278 279 envp = env__add(envp, "HOME", pw->pw_dir); 280 envp = env__add(envp, "USER", pw->pw_name); 281 282 // Use $PATH if provided, otherwise default depending on uid. 283 const char * envp_path = env__get(envp, "PATH"); 284 if (envp_path != NULL) { 285 setenv("PATH", envp_path, 1); 286 } else if (pw->pw_uid == 0) { 287 const char *sanitizedRootPath = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"; 288 envp = env__add(envp, "PATH", sanitizedRootPath); 289 setenv("PATH", sanitizedRootPath, 1); 290 } else { 291 const char *sanitizedUserPath = "/usr/local/bin:/usr/bin:/bin"; 292 envp = env__add(envp, "PATH", sanitizedUserPath); 293 setenv("PATH", sanitizedUserPath, 1); 294 } 295 296 return envp; 297 } 298 299 int child_fork(msg_request_t *req, int in, int out, int err) { 300 int rv; 301 302 rv = fork(); 303 if (rv == -1) { 304 perror("fork"); 305 exit(1); 306 } 307 308 if (rv == 0) { 309 const char *user; 310 struct passwd *pw; 311 char *default_argv[] = { "/bin/sh", NULL }; 312 char *default_envp[] = { NULL }; 313 char **argv = default_argv; 314 char **envp = default_envp; 315 char **extra_env_vars = NULL; 316 317 rv = dup2(in, STDIN_FILENO); 318 assert(rv != -1); 319 320 rv = dup2(out, STDOUT_FILENO); 321 assert(rv != -1); 322 323 rv = dup2(err, STDERR_FILENO); 324 assert(rv != -1); 325 326 rv = setsid(); 327 assert(rv != -1); 328 329 user = req->user.name; 330 if (!strlen(user)) { 331 user = "root"; 332 } 333 334 pw = getpwnam(user); 335 if (pw == NULL) { 336 perror("getpwnam"); 337 goto error; 338 } 339 340 if (strlen(pw->pw_shell)) { 341 default_argv[0] = strdup(pw->pw_shell); 342 } 343 344 /* Set controlling terminal if needed */ 345 if (isatty(in)) { 346 rv = ioctl(STDIN_FILENO, TIOCSCTTY, 1); 347 assert(rv != -1); 348 } 349 350 /* Use argv from request if needed */ 351 if (req->arg.count) { 352 argv = (char **)msg_array_export(&req->arg); 353 assert(argv != NULL); 354 } 355 356 rv = msg_rlimit_export(&req->rlim); 357 if (rv == -1) { 358 perror("msg_rlimit_export"); 359 goto error; 360 } 361 362 rv = msg_user_export(&req->user, pw); 363 if (rv == -1) { 364 perror("msg_user_export"); 365 goto error; 366 } 367 368 if (req->env.count) { 369 extra_env_vars = (char **)msg_array_export(&req->env); 370 assert(extra_env_vars != NULL); 371 } 372 373 envp = child_setup_environment(pw, extra_env_vars); 374 assert(envp != NULL); 375 376 if (strlen(req->dir.path)) { 377 rv = chdir(req->dir.path); 378 if (rv == -1) { 379 perror("chdir"); 380 goto error; 381 } 382 } 383 384 // don't mask signals of child process 385 sigset_t mask; 386 sigemptyset(&mask); 387 sigprocmask(SIG_SETMASK, &mask, NULL); 388 389 execvpe(argv[0], argv, envp); 390 perror("execvpe"); 391 392 error: 393 exit(255); 394 } 395 396 return rv; 397 } 398 399 int child_handle_interactive(int fd, wshd_t *w, msg_request_t *req) { 400 int i, j; 401 int num_descriptors = 3; 402 int p[num_descriptors][2]; 403 int p_[num_descriptors]; 404 int rv; 405 msg_response_t res; 406 407 msg_response_init(&res); 408 409 /* Initialize so that the error handler can do its job */ 410 for (i = 0; i < num_descriptors; i++) { 411 p[i][0] = -1; 412 p[i][1] = -1; 413 p_[i] = -1; 414 } 415 416 for (i = 1; i < num_descriptors; i++) { 417 rv = pipe(p[i]); 418 if (rv == -1) { 419 perror("pipe"); 420 abort(); 421 } 422 423 fcntl_mix_cloexec(p[i][0]); 424 fcntl_mix_cloexec(p[i][1]); 425 } 426 427 rv = openpty(&p[0][0], &p[0][1], NULL); 428 if (rv < 0) { 429 perror("openpty"); 430 abort(); 431 } 432 433 fcntl_mix_cloexec(p[0][0]); 434 fcntl_mix_cloexec(p[0][1]); 435 436 /* Descriptors to send to client */ 437 p_[0] = p[0][0]; 438 p_[1] = p[1][0]; 439 p_[2] = p[2][0]; 440 441 rv = un_send_fds(fd, (char *)&res, sizeof(res), p_, num_descriptors); 442 if (rv == -1) { 443 goto err; 444 } 445 446 rv = child_fork(req, p[0][1], p[0][1], p[0][1]); 447 assert(rv > 0); 448 449 write(p[2][1], &rv, sizeof(rv)); 450 451 child_pid_to_fd_add(w, rv, p[1][1]); 452 453 err: 454 for (i = 0; i < 3; i++) { 455 for (j = 0; j < 2; j++) { 456 if (p[i][j] > -1) { 457 close(p[i][j]); 458 p[i][j] = -1; 459 } 460 } 461 } 462 463 if (fd > -1) { 464 close(fd); 465 fd = -1; 466 } 467 468 return 0; 469 } 470 471 int child_handle_noninteractive(int fd, wshd_t *w, msg_request_t *req) { 472 int i, j; 473 int num_descriptors = 5; 474 int p[num_descriptors][2]; 475 int p_[num_descriptors]; 476 int rv; 477 msg_response_t res; 478 479 msg_response_init(&res); 480 481 /* Initialize so that the error handler can do its job */ 482 for (i = 0; i < num_descriptors; i++) { 483 p[i][0] = -1; 484 p[i][1] = -1; 485 p_[i] = -1; 486 } 487 488 for (i = 0; i < num_descriptors; i++) { 489 rv = pipe(p[i]); 490 if (rv == -1) { 491 perror("pipe"); 492 abort(); 493 } 494 495 fcntl_mix_cloexec(p[i][0]); 496 fcntl_mix_cloexec(p[i][1]); 497 } 498 499 /* Descriptors to send to client */ 500 p_[0] = p[0][1]; 501 p_[1] = p[1][0]; 502 p_[2] = p[2][0]; 503 p_[3] = p[3][0]; 504 p_[4] = p[4][0]; 505 506 rv = un_send_fds(fd, (char *)&res, sizeof(res), p_, num_descriptors); 507 if (rv == -1) { 508 goto err; 509 } 510 511 rv = child_fork(req, p[0][0], p[1][1], p[2][1]); 512 assert(rv > 0); 513 514 write(p[4][1], &rv, sizeof(rv)); 515 516 child_pid_to_fd_add(w, rv, p[3][1]); 517 518 err: 519 for (i = 0; i < 5; i++) { 520 for (j = 0; j < 2; j++) { 521 if (p[i][j] > -1) { 522 close(p[i][j]); 523 p[i][j] = -1; 524 } 525 } 526 } 527 528 if (fd > -1) { 529 close(fd); 530 fd = -1; 531 } 532 533 return 0; 534 } 535 536 int child_accept(wshd_t *w) { 537 int rv, fd; 538 msg_request_t req; 539 540 rv = accept(w->fd, NULL, NULL); 541 if (rv == -1) { 542 perror("accept"); 543 abort(); 544 } 545 546 fd = rv; 547 548 fcntl_mix_cloexec(fd); 549 550 rv = un_recv_fds(fd, (char *)&req, sizeof(req), NULL, 0); 551 if (rv < 0) { 552 perror("recvmsg"); 553 exit(255); 554 } 555 556 if (rv == 0) { 557 close(fd); 558 return 0; 559 } 560 561 assert(rv == sizeof(req)); 562 563 if (req.tty) { 564 return child_handle_interactive(fd, w, &req); 565 } else { 566 return child_handle_noninteractive(fd, w, &req); 567 } 568 } 569 570 void child_handle_sigchld(wshd_t *w) { 571 pid_t pid; 572 int status, exitstatus; 573 int fd; 574 575 while (1) { 576 do { 577 pid = waitpid(-1, &status, WNOHANG); 578 } while (pid == -1 && errno == EINTR); 579 580 /* Break when there are no more children */ 581 if (pid <= 0) { 582 break; 583 } 584 585 /* Processes can be reparented, so a pid may not map to an fd */ 586 fd = child_pid_to_fd_remove(w, pid); 587 if (fd == -1) { 588 continue; 589 } 590 591 if (WIFEXITED(status)) { 592 exitstatus = WEXITSTATUS(status); 593 594 /* Send exit status to client */ 595 write(fd, &exitstatus, sizeof(exitstatus)); 596 } else { 597 assert(WIFSIGNALED(status)); 598 599 /* No exit status */ 600 } 601 602 close(fd); 603 } 604 } 605 606 int child_signalfd(void) { 607 sigset_t mask; 608 int rv; 609 int fd; 610 611 sigemptyset(&mask); 612 sigaddset(&mask, SIGCHLD); 613 614 rv = sigprocmask(SIG_BLOCK, &mask, NULL); 615 if (rv == -1) { 616 perror("sigprocmask"); 617 abort(); 618 } 619 620 fd = signalfd(-1, &mask, SFD_NONBLOCK | SFD_CLOEXEC); 621 if (fd == -1) { 622 perror("signalfd"); 623 abort(); 624 } 625 626 return fd; 627 } 628 629 int child_loop(wshd_t *w) { 630 int sfd; 631 int rv; 632 633 close(STDIN_FILENO); 634 close(STDOUT_FILENO); 635 close(STDERR_FILENO); 636 637 sfd = child_signalfd(); 638 639 for (;;) { 640 fd_set fds; 641 642 FD_ZERO(&fds); 643 FD_SET(w->fd, &fds); 644 FD_SET(sfd, &fds); 645 646 do { 647 rv = select(FD_SETSIZE, &fds, NULL, NULL, NULL); 648 } while (rv == -1 && errno == EINTR); 649 650 if (rv == -1) { 651 perror("select"); 652 abort(); 653 } 654 655 if (FD_ISSET(w->fd, &fds)) { 656 child_accept(w); 657 } 658 659 if (FD_ISSET(sfd, &fds)) { 660 struct signalfd_siginfo fdsi; 661 662 rv = read(sfd, &fdsi, sizeof(fdsi)); 663 assert(rv == sizeof(fdsi)); 664 665 /* Ignore siginfo and loop waitpid to catch all children */ 666 child_handle_sigchld(w); 667 } 668 } 669 670 return 1; 671 } 672 673 /* No header defines this */ 674 extern int pivot_root(const char *new_root, const char *put_old); 675 676 void child_save_to_shm(wshd_t *w) { 677 int rv; 678 void *w_; 679 680 rv = shmget(0xdeadbeef, sizeof(*w), IPC_CREAT | IPC_EXCL | 0600); 681 if (rv == -1) { 682 perror("shmget"); 683 abort(); 684 } 685 686 w_ = shmat(rv, NULL, 0); 687 if (w_ == (void *)-1) { 688 perror("shmat"); 689 abort(); 690 } 691 692 memcpy(w_, w, sizeof(*w)); 693 } 694 695 wshd_t *child_load_from_shm(void) { 696 int rv; 697 int shmid; 698 wshd_t *w; 699 void *w_; 700 701 shmid = shmget(0xdeadbeef, sizeof(*w), 0600); 702 if (shmid == -1) { 703 perror("shmget"); 704 abort(); 705 } 706 707 w_ = shmat(shmid, NULL, 0); 708 if (w_ == (void *)-1) { 709 perror("shmat"); 710 abort(); 711 } 712 713 w = malloc(sizeof(*w)); 714 if (w == NULL) { 715 perror("malloc"); 716 abort(); 717 } 718 719 memcpy(w, w_, sizeof(*w)); 720 721 rv = shmdt(w_); 722 if (rv == -1) { 723 perror("shmdt"); 724 abort(); 725 } 726 727 rv = shmctl(shmid, IPC_RMID, NULL); 728 if (rv == -1) { 729 perror("shmctl"); 730 abort(); 731 } 732 733 return w; 734 } 735 736 int child_run(void *data) { 737 wshd_t *w = (wshd_t *)data; 738 int rv; 739 char pivoted_lib_path[PATH_MAX]; 740 size_t pivoted_lib_path_len; 741 742 /* Wait for parent */ 743 rv = barrier_wait(&w->barrier_parent); 744 assert(rv == 0); 745 746 /* Prepare lib path for pivot */ 747 strcpy(pivoted_lib_path, "/tmp/garden-host"); 748 pivoted_lib_path_len = strlen(pivoted_lib_path); 749 realpath(w->lib_path, pivoted_lib_path + pivoted_lib_path_len); 750 751 rv = mount(w->root_path, w->root_path, NULL, MS_BIND|MS_REC, NULL); 752 if(rv == -1) { 753 perror("mount"); 754 abort(); 755 } 756 757 rv = chdir(w->root_path); 758 if (rv == -1) { 759 perror("chdir"); 760 abort(); 761 } 762 763 /* Ensure /tmp is world-writable as part of container contract */ 764 rv = chmod("tmp", 01777); 765 if (rv == -1) { 766 perror("chmod"); 767 abort(); 768 } 769 770 rv = mkdir("tmp/garden-host", 0700); 771 if (rv == -1 && errno != EEXIST) { 772 perror("mkdir"); 773 abort(); 774 } 775 776 rv = pivot_root(".", "tmp/garden-host"); 777 if (rv == -1) { 778 perror("pivot_root"); 779 abort(); 780 } 781 782 rv = chdir("/"); 783 if (rv == -1) { 784 perror("chdir"); 785 abort(); 786 } 787 788 rv = symlink("/dev/pts/ptmx", "/dev/ptmx"); 789 if (rv == -1 || errno == EEXIST) { 790 rv = unlink("/dev/ptmx"); 791 if (rv == -1) { 792 perror("unlink"); 793 abort(); 794 } 795 796 rv = symlink("/dev/pts/ptmx", "/dev/ptmx"); 797 } 798 799 rv = setuid(0); 800 if (rv == -1) { 801 perror("setuid"); 802 abort(); 803 } 804 805 rv = setgid(0); 806 if (rv == -1) { 807 perror("setgid"); 808 abort(); 809 } 810 811 rv = hook(pivoted_lib_path, "child-after-pivot"); 812 if(rv != 0) { 813 perror("hook-child-after-pivot"); 814 abort(); 815 } 816 817 child_save_to_shm(w); 818 819 execl("/sbin/wshd", "/sbin/wshd", "--continue", NULL); 820 perror("exec"); 821 abort(); 822 } 823 824 int child_continue(int argc, char **argv) { 825 wshd_t *w; 826 int rv; 827 828 w = child_load_from_shm(); 829 830 /* Process MUST not leak file descriptors to children */ 831 barrier_mix_cloexec(&w->barrier_child); 832 fcntl_mix_cloexec(w->fd); 833 834 if (strlen(w->title) > 0) { 835 setproctitle(argv, w->title); 836 } 837 838 /* Clean up temporary pivot_root dir */ 839 rv = umount2("/tmp/garden-host", MNT_DETACH); 840 if (rv == -1) { 841 perror("unmount2"); 842 exit(1); 843 } 844 845 /* Detach this process from its original group */ 846 rv = setsid(); 847 assert(rv > 0 && rv == getpid()); 848 849 /* Signal parent */ 850 rv = barrier_signal(&w->barrier_child); 851 assert(rv == 0); 852 853 return child_loop(w); 854 } 855 856 pid_t child_start(wshd_t *w) { 857 long pagesize; 858 void *stack; 859 int flags = 0; 860 pid_t pid; 861 862 pagesize = sysconf(_SC_PAGESIZE); 863 stack = alloca(pagesize); 864 assert(stack != NULL); 865 866 /* Point to top of stack (it grows down) */ 867 stack = stack + pagesize; 868 869 /* Setup namespaces */ 870 flags |= CLONE_NEWIPC; 871 flags |= CLONE_NEWNET; 872 flags |= CLONE_NEWNS; 873 flags |= CLONE_NEWPID; 874 flags |= CLONE_NEWUTS; 875 flags |= w->clone_flags; 876 877 pid = clone(child_run, stack, flags, w); 878 if (pid == -1) { 879 perror("clone"); 880 abort(); 881 } 882 883 return pid; 884 } 885 886 void parent_setenv_pid(wshd_t *w, int pid) { 887 char buf[16]; 888 int rv; 889 890 rv = snprintf(buf, sizeof(buf), "%d", pid); 891 assert(rv < sizeof(buf)); 892 893 rv = setenv("PID", buf, 1); 894 assert(rv == 0); 895 } 896 897 /* Returns the maximum allowed number of open files. */ 898 long int max_nr_open() { 899 char file_data[32]; 900 size_t bytes_read; 901 FILE *f; 902 long int nr; 903 904 if ((f = fopen("/proc/sys/fs/nr_open", "r")) == NULL) { 905 perror("Failed to open /proc/sys/fs/nr_open"); 906 abort(); 907 } 908 909 bytes_read = fread(file_data, 1, sizeof(file_data), f); 910 if (ferror(f) || bytes_read == 0) { 911 perror("Failed to read /proc/sys/fs/nr_open"); 912 abort(); 913 } 914 915 if (fclose(f)) { 916 perror("Failed to close /proc/sys/fs/nr_open"); 917 abort(); 918 } 919 920 errno = 0; 921 nr = strtol(file_data, NULL, 10); 922 if (errno) { 923 perror("Contents of /proc/sys/fs/nr_open could not be converted to a long int"); 924 abort(); 925 } 926 return nr; 927 } 928 929 /* Sets a hard resource limit to specified value. */ 930 void set_hard_rlimit(char * resource_name, int resource, rlim_t hard_limit) { 931 char err_text[1024]; 932 struct rlimit lim = {0, 0}; 933 if (getrlimit(resource, &lim)) { 934 strcpy(err_text, "getrlimit failed to return "); 935 strcat(err_text, resource_name); 936 perror(err_text); 937 abort(); 938 } 939 940 lim.rlim_max = hard_limit; 941 if (setrlimit(resource, &lim)) { 942 strcpy(err_text, "setrlimit failed to set "); 943 strcat(err_text, resource_name); 944 perror(err_text); 945 abort(); 946 } 947 } 948 949 /* Sets hard resource limits to their maximum permitted values. */ 950 void set_hard_rlimits() { 951 set_hard_rlimit("RLIMIT_AS", RLIMIT_AS, RLIM_INFINITY); 952 set_hard_rlimit("RLIMIT_CORE", RLIMIT_CORE, RLIM_INFINITY); 953 set_hard_rlimit("RLIMIT_CPU", RLIMIT_CPU, RLIM_INFINITY); 954 set_hard_rlimit("RLIMIT_DATA", RLIMIT_DATA, RLIM_INFINITY); 955 set_hard_rlimit("RLIMIT_FSIZE", RLIMIT_FSIZE, RLIM_INFINITY); 956 set_hard_rlimit("RLIMIT_LOCKS", RLIMIT_LOCKS, RLIM_INFINITY); 957 set_hard_rlimit("RLIMIT_MEMLOCK", RLIMIT_MEMLOCK, RLIM_INFINITY); 958 set_hard_rlimit("RLIMIT_MSGQUEUE", RLIMIT_MSGQUEUE, RLIM_INFINITY); 959 set_hard_rlimit("RLIMIT_NICE", RLIMIT_NICE, RLIM_INFINITY); 960 set_hard_rlimit("RLIMIT_NOFILE", RLIMIT_NOFILE, max_nr_open()); 961 set_hard_rlimit("RLIMIT_NPROC", RLIMIT_NPROC, RLIM_INFINITY); 962 set_hard_rlimit("RLIMIT_RSS", RLIMIT_RSS, RLIM_INFINITY); 963 set_hard_rlimit("RLIMIT_RTPRIO", RLIMIT_RTPRIO, RLIM_INFINITY); 964 set_hard_rlimit("RLIMIT_SIGPENDING", RLIMIT_SIGPENDING, RLIM_INFINITY); 965 set_hard_rlimit("RLIMIT_STACK", RLIMIT_STACK, RLIM_INFINITY); 966 } 967 968 int parent_run(wshd_t *w) { 969 char path[MAXPATHLEN]; 970 int rv; 971 pid_t pid; 972 973 memset(path, 0, sizeof(path)); 974 975 strcpy(path + strlen(path), w->run_path); 976 strcpy(path + strlen(path), "/"); 977 strcpy(path + strlen(path), "wshd.sock"); 978 979 w->fd = un_listen(path); 980 981 rv = barrier_open(&w->barrier_parent); 982 assert(rv == 0); 983 984 rv = barrier_open(&w->barrier_child); 985 assert(rv == 0); 986 987 /* Unshare mount namespace, so the before clone hook is free to mount 988 * whatever it needs without polluting the global mount namespace. */ 989 rv = unshare(CLONE_NEWNS); 990 assert(rv == 0); 991 992 rv = hook(w->lib_path, "parent-before-clone"); 993 assert(rv == 0); 994 995 /* Set hard resource limits to their maximum values so that soft and 996 hard resource limits can be set to arbitrary values even in an 997 unprivileged container. */ 998 set_hard_rlimits(); 999 1000 pid = child_start(w); 1001 assert(pid > 0); 1002 1003 parent_setenv_pid(w, pid); 1004 1005 rv = hook(w->lib_path, "parent-after-clone"); 1006 assert(rv == 0); 1007 1008 rv = barrier_signal(&w->barrier_parent); 1009 if (rv == -1) { 1010 fprintf(stderr, "Error waking up child process\n"); 1011 exit(1); 1012 } 1013 1014 rv = barrier_wait(&w->barrier_child); 1015 if (rv == -1) { 1016 fprintf(stderr, "Error waiting for acknowledgement from child process\n"); 1017 exit(1); 1018 } 1019 1020 return 0; 1021 } 1022 1023 int main(int argc, char **argv) { 1024 wshd_t *w; 1025 int rv; 1026 1027 /* Continue child execution in the context of the container */ 1028 if (argc > 1 && strcmp(argv[1], "--continue") == 0) { 1029 return child_continue(argc, argv); 1030 } 1031 1032 w = calloc(1, sizeof(*w)); 1033 assert(w != NULL); 1034 1035 rv = wshd__getopt(w, argc, argv); 1036 if (rv == -1) { 1037 exit(1); 1038 } 1039 1040 if (strlen(w->run_path) == 0) { 1041 strcpy(w->run_path, "run"); 1042 } 1043 1044 if (strlen(w->lib_path) == 0) { 1045 strcpy(w->lib_path, "lib"); 1046 } 1047 1048 if (strlen(w->root_path) == 0) { 1049 strcpy(w->root_path, "root"); 1050 } 1051 1052 assert_directory(w->run_path); 1053 assert_directory(w->lib_path); 1054 assert_directory(w->root_path); 1055 1056 parent_run(w); 1057 1058 return 0; 1059 }