github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/syscall/exec_plan9.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Fork, exec, wait, etc. 6 7 package syscall 8 9 import ( 10 "runtime" 11 "sync" 12 "unsafe" 13 ) 14 15 // Lock synchronizing creation of new file descriptors with fork. 16 // 17 // We want the child in a fork/exec sequence to inherit only the 18 // file descriptors we intend. To do that, we mark all file 19 // descriptors close-on-exec and then, in the child, explicitly 20 // unmark the ones we want the exec'ed program to keep. 21 // Unix doesn't make this easy: there is, in general, no way to 22 // allocate a new file descriptor close-on-exec. Instead you 23 // have to allocate the descriptor and then mark it close-on-exec. 24 // If a fork happens between those two events, the child's exec 25 // will inherit an unwanted file descriptor. 26 // 27 // This lock solves that race: the create new fd/mark close-on-exec 28 // operation is done holding ForkLock for reading, and the fork itself 29 // is done holding ForkLock for writing. At least, that's the idea. 30 // There are some complications. 31 // 32 // Some system calls that create new file descriptors can block 33 // for arbitrarily long times: open on a hung NFS server or named 34 // pipe, accept on a socket, and so on. We can't reasonably grab 35 // the lock across those operations. 36 // 37 // It is worse to inherit some file descriptors than others. 38 // If a non-malicious child accidentally inherits an open ordinary file, 39 // that's not a big deal. On the other hand, if a long-lived child 40 // accidentally inherits the write end of a pipe, then the reader 41 // of that pipe will not see EOF until that child exits, potentially 42 // causing the parent program to hang. This is a common problem 43 // in threaded C programs that use popen. 44 // 45 // Luckily, the file descriptors that are most important not to 46 // inherit are not the ones that can take an arbitrarily long time 47 // to create: pipe returns instantly, and the net package uses 48 // non-blocking I/O to accept on a listening socket. 49 // The rules for which file descriptor-creating operations use the 50 // ForkLock are as follows: 51 // 52 // 1) Pipe. Does not block. Use the ForkLock. 53 // 2) Socket. Does not block. Use the ForkLock. 54 // 3) Accept. If using non-blocking mode, use the ForkLock. 55 // Otherwise, live with the race. 56 // 4) Open. Can block. Use O_CLOEXEC if available (Linux). 57 // Otherwise, live with the race. 58 // 5) Dup. Does not block. Use the ForkLock. 59 // On Linux, could use fcntl F_DUPFD_CLOEXEC 60 // instead of the ForkLock, but only for dup(fd, -1). 61 62 var ForkLock sync.RWMutex 63 64 // StringSlicePtr is deprecated. Use SlicePtrFromStrings instead. 65 // If any string contains a NUL byte this function panics instead 66 // of returning an error. 67 func StringSlicePtr(ss []string) []*byte { 68 bb := make([]*byte, len(ss)+1) 69 for i := 0; i < len(ss); i++ { 70 bb[i] = StringBytePtr(ss[i]) 71 } 72 bb[len(ss)] = nil 73 return bb 74 } 75 76 // SlicePtrFromStrings converts a slice of strings to a slice of 77 // pointers to NUL-terminated byte slices. If any string contains 78 // a NUL byte, it returns (nil, EINVAL). 79 func SlicePtrFromStrings(ss []string) ([]*byte, error) { 80 var err error 81 bb := make([]*byte, len(ss)+1) 82 for i := 0; i < len(ss); i++ { 83 bb[i], err = BytePtrFromString(ss[i]) 84 if err != nil { 85 return nil, err 86 } 87 } 88 bb[len(ss)] = nil 89 return bb, nil 90 } 91 92 // readdirnames returns the names of files inside the directory represented by dirfd. 93 func readdirnames(dirfd int) (names []string, err error) { 94 names = make([]string, 0, 100) 95 var buf [STATMAX]byte 96 97 for { 98 n, e := Read(dirfd, buf[:]) 99 if e != nil { 100 return nil, e 101 } 102 if n == 0 { 103 break 104 } 105 for i := 0; i < n; { 106 m, _ := gbit16(buf[i:]) 107 m += 2 108 109 if m < STATFIXLEN { 110 return nil, ErrBadStat 111 } 112 113 s, _, ok := gstring(buf[i+41:]) 114 if !ok { 115 return nil, ErrBadStat 116 } 117 names = append(names, s) 118 i += int(m) 119 } 120 } 121 return 122 } 123 124 // readdupdevice returns a list of currently opened fds (excluding stdin, stdout, stderr) from the dup device #d. 125 // ForkLock should be write locked before calling, so that no new fds would be created while the fd list is being read. 126 func readdupdevice() (fds []int, err error) { 127 dupdevfd, err := Open("#d", O_RDONLY) 128 if err != nil { 129 return 130 } 131 defer Close(dupdevfd) 132 133 names, err := readdirnames(dupdevfd) 134 if err != nil { 135 return 136 } 137 138 fds = make([]int, 0, len(names)/2) 139 for _, name := range names { 140 if n := len(name); n > 3 && name[n-3:n] == "ctl" { 141 continue 142 } 143 fd := int(atoi([]byte(name))) 144 switch fd { 145 case 0, 1, 2, dupdevfd: 146 continue 147 } 148 fds = append(fds, fd) 149 } 150 return 151 } 152 153 var startupFds []int 154 155 // Plan 9 does not allow clearing the OCEXEC flag 156 // from the underlying channel backing an open file descriptor, 157 // therefore we store a list of already opened file descriptors 158 // inside startupFds and skip them when manually closing descriptors 159 // not meant to be passed to a child exec. 160 func init() { 161 startupFds, _ = readdupdevice() 162 } 163 164 // forkAndExecInChild forks the process, calling dup onto 0..len(fd) 165 // and finally invoking exec(argv0, argvv, envv) in the child. 166 // If a dup or exec fails, it writes the error string to pipe. 167 // (The pipe write end is close-on-exec so if exec succeeds, it will be closed.) 168 // 169 // In the child, this function must not acquire any locks, because 170 // they might have been locked at the time of the fork. This means 171 // no rescheduling, no malloc calls, and no new stack segments. 172 // The calls to RawSyscall are okay because they are assembly 173 // functions that do not grow the stack. 174 func forkAndExecInChild(argv0 *byte, argv []*byte, envv []envItem, dir *byte, attr *ProcAttr, fdsToClose []int, pipe int, rflag int) (pid int, err error) { 175 // Declare all variables at top in case any 176 // declarations require heap allocation (e.g., errbuf). 177 var ( 178 r1 uintptr 179 nextfd int 180 i int 181 clearenv int 182 envfd int 183 errbuf [ERRMAX]byte 184 ) 185 186 // Guard against side effects of shuffling fds below. 187 // Make sure that nextfd is beyond any currently open files so 188 // that we can't run the risk of overwriting any of them. 189 fd := make([]int, len(attr.Files)) 190 nextfd = len(attr.Files) 191 for i, ufd := range attr.Files { 192 if nextfd < int(ufd) { 193 nextfd = int(ufd) 194 } 195 fd[i] = int(ufd) 196 } 197 nextfd++ 198 199 if envv != nil { 200 clearenv = RFCENVG 201 } 202 203 // About to call fork. 204 // No more allocation or calls of non-assembly functions. 205 r1, _, _ = RawSyscall(SYS_RFORK, uintptr(RFPROC|RFFDG|RFREND|clearenv|rflag), 0, 0) 206 207 if r1 != 0 { 208 if int32(r1) == -1 { 209 return 0, NewError(errstr()) 210 } 211 // parent; return PID 212 return int(r1), nil 213 } 214 215 // Fork succeeded, now in child. 216 217 // Close fds we don't need. 218 for i = 0; i < len(fdsToClose); i++ { 219 r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(fdsToClose[i]), 0, 0) 220 if int32(r1) == -1 { 221 goto childerror 222 } 223 } 224 225 if envv != nil { 226 // Write new environment variables. 227 for i = 0; i < len(envv); i++ { 228 r1, _, _ = RawSyscall(SYS_CREATE, uintptr(unsafe.Pointer(envv[i].name)), uintptr(O_WRONLY), uintptr(0666)) 229 230 if int32(r1) == -1 { 231 goto childerror 232 } 233 234 envfd = int(r1) 235 236 r1, _, _ = RawSyscall6(SYS_PWRITE, uintptr(envfd), uintptr(unsafe.Pointer(envv[i].value)), uintptr(envv[i].nvalue), 237 ^uintptr(0), ^uintptr(0), 0) 238 239 if int32(r1) == -1 || int(r1) != envv[i].nvalue { 240 goto childerror 241 } 242 243 r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(envfd), 0, 0) 244 245 if int32(r1) == -1 { 246 goto childerror 247 } 248 } 249 } 250 251 // Chdir 252 if dir != nil { 253 r1, _, _ = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0) 254 if int32(r1) == -1 { 255 goto childerror 256 } 257 } 258 259 // Pass 1: look for fd[i] < i and move those up above len(fd) 260 // so that pass 2 won't stomp on an fd it needs later. 261 if pipe < nextfd { 262 r1, _, _ = RawSyscall(SYS_DUP, uintptr(pipe), uintptr(nextfd), 0) 263 if int32(r1) == -1 { 264 goto childerror 265 } 266 pipe = nextfd 267 nextfd++ 268 } 269 for i = 0; i < len(fd); i++ { 270 if fd[i] >= 0 && fd[i] < int(i) { 271 r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(nextfd), 0) 272 if int32(r1) == -1 { 273 goto childerror 274 } 275 276 fd[i] = nextfd 277 nextfd++ 278 if nextfd == pipe { // don't stomp on pipe 279 nextfd++ 280 } 281 } 282 } 283 284 // Pass 2: dup fd[i] down onto i. 285 for i = 0; i < len(fd); i++ { 286 if fd[i] == -1 { 287 RawSyscall(SYS_CLOSE, uintptr(i), 0, 0) 288 continue 289 } 290 if fd[i] == int(i) { 291 continue 292 } 293 r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(i), 0) 294 if int32(r1) == -1 { 295 goto childerror 296 } 297 } 298 299 // Pass 3: close fd[i] if it was moved in the previous pass. 300 for i = 0; i < len(fd); i++ { 301 if fd[i] >= 0 && fd[i] != int(i) { 302 RawSyscall(SYS_CLOSE, uintptr(fd[i]), 0, 0) 303 } 304 } 305 306 // Time to exec. 307 r1, _, _ = RawSyscall(SYS_EXEC, 308 uintptr(unsafe.Pointer(argv0)), 309 uintptr(unsafe.Pointer(&argv[0])), 0) 310 311 childerror: 312 // send error string on pipe 313 RawSyscall(SYS_ERRSTR, uintptr(unsafe.Pointer(&errbuf[0])), uintptr(len(errbuf)), 0) 314 errbuf[len(errbuf)-1] = 0 315 i = 0 316 for i < len(errbuf) && errbuf[i] != 0 { 317 i++ 318 } 319 320 RawSyscall6(SYS_PWRITE, uintptr(pipe), uintptr(unsafe.Pointer(&errbuf[0])), uintptr(i), 321 ^uintptr(0), ^uintptr(0), 0) 322 323 for { 324 RawSyscall(SYS_EXITS, 0, 0, 0) 325 } 326 327 // Calling panic is not actually safe, 328 // but the for loop above won't break 329 // and this shuts up the compiler. 330 panic("unreached") 331 } 332 333 func cexecPipe(p []int) error { 334 e := Pipe(p) 335 if e != nil { 336 return e 337 } 338 339 fd, e := Open("#d/"+itoa(p[1]), O_CLOEXEC) 340 if e != nil { 341 Close(p[0]) 342 Close(p[1]) 343 return e 344 } 345 346 Close(fd) 347 return nil 348 } 349 350 type envItem struct { 351 name *byte 352 value *byte 353 nvalue int 354 } 355 356 type ProcAttr struct { 357 Dir string // Current working directory. 358 Env []string // Environment. 359 Files []uintptr // File descriptors. 360 Sys *SysProcAttr 361 } 362 363 type SysProcAttr struct { 364 Rfork int // additional flags to pass to rfork 365 } 366 367 var zeroProcAttr ProcAttr 368 var zeroSysProcAttr SysProcAttr 369 370 func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { 371 var ( 372 p [2]int 373 n int 374 errbuf [ERRMAX]byte 375 wmsg Waitmsg 376 ) 377 378 if attr == nil { 379 attr = &zeroProcAttr 380 } 381 sys := attr.Sys 382 if sys == nil { 383 sys = &zeroSysProcAttr 384 } 385 386 p[0] = -1 387 p[1] = -1 388 389 // Convert args to C form. 390 argv0p, err := BytePtrFromString(argv0) 391 if err != nil { 392 return 0, err 393 } 394 argvp, err := SlicePtrFromStrings(argv) 395 if err != nil { 396 return 0, err 397 } 398 399 var dir *byte 400 if attr.Dir != "" { 401 dir, err = BytePtrFromString(attr.Dir) 402 if err != nil { 403 return 0, err 404 } 405 } 406 var envvParsed []envItem 407 if attr.Env != nil { 408 envvParsed = make([]envItem, 0, len(attr.Env)) 409 for _, v := range attr.Env { 410 i := 0 411 for i < len(v) && v[i] != '=' { 412 i++ 413 } 414 415 envname, err := BytePtrFromString("/env/" + v[:i]) 416 if err != nil { 417 return 0, err 418 } 419 envvalue := make([]byte, len(v)-i) 420 copy(envvalue, v[i+1:]) 421 envvParsed = append(envvParsed, envItem{envname, &envvalue[0], len(v) - i}) 422 } 423 } 424 425 // Acquire the fork lock to prevent other threads from creating new fds before we fork. 426 ForkLock.Lock() 427 428 // get a list of open fds, excluding stdin,stdout and stderr that need to be closed in the child. 429 // no new fds can be created while we hold the ForkLock for writing. 430 openFds, e := readdupdevice() 431 if e != nil { 432 ForkLock.Unlock() 433 return 0, e 434 } 435 436 fdsToClose := make([]int, 0, len(openFds)) 437 for _, fd := range openFds { 438 doClose := true 439 440 // exclude files opened at startup. 441 for _, sfd := range startupFds { 442 if fd == sfd { 443 doClose = false 444 break 445 } 446 } 447 448 // exclude files explicitly requested by the caller. 449 for _, rfd := range attr.Files { 450 if fd == int(rfd) { 451 doClose = false 452 break 453 } 454 } 455 456 if doClose { 457 fdsToClose = append(fdsToClose, fd) 458 } 459 } 460 461 // Allocate child status pipe close on exec. 462 e = cexecPipe(p[:]) 463 464 if e != nil { 465 return 0, e 466 } 467 fdsToClose = append(fdsToClose, p[0]) 468 469 // Kick off child. 470 pid, err = forkAndExecInChild(argv0p, argvp, envvParsed, dir, attr, fdsToClose, p[1], sys.Rfork) 471 472 if err != nil { 473 if p[0] >= 0 { 474 Close(p[0]) 475 Close(p[1]) 476 } 477 ForkLock.Unlock() 478 return 0, err 479 } 480 ForkLock.Unlock() 481 482 // Read child error status from pipe. 483 Close(p[1]) 484 n, err = Read(p[0], errbuf[:]) 485 Close(p[0]) 486 487 if err != nil || n != 0 { 488 if n != 0 { 489 err = NewError(string(errbuf[:n])) 490 } 491 492 // Child failed; wait for it to exit, to make sure 493 // the zombies don't accumulate. 494 for wmsg.Pid != pid { 495 Await(&wmsg) 496 } 497 return 0, err 498 } 499 500 // Read got EOF, so pipe closed on exec, so exec succeeded. 501 return pid, nil 502 } 503 504 type waitErr struct { 505 Waitmsg 506 err error 507 } 508 509 var procs struct { 510 sync.Mutex 511 waits map[int]chan *waitErr 512 } 513 514 // startProcess starts a new goroutine, tied to the OS 515 // thread, which runs the process and subsequently waits 516 // for it to finish, communicating the process stats back 517 // to any goroutines that may have been waiting on it. 518 // 519 // Such a dedicated goroutine is needed because on 520 // Plan 9, only the parent thread can wait for a child, 521 // whereas goroutines tend to jump OS threads (e.g., 522 // between starting a process and running Wait(), the 523 // goroutine may have been rescheduled). 524 func startProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { 525 type forkRet struct { 526 pid int 527 err error 528 } 529 530 forkc := make(chan forkRet, 1) 531 go func() { 532 runtime.LockOSThread() 533 var ret forkRet 534 535 ret.pid, ret.err = forkExec(argv0, argv, attr) 536 // If fork fails there is nothing to wait for. 537 if ret.err != nil || ret.pid == 0 { 538 forkc <- ret 539 return 540 } 541 542 waitc := make(chan *waitErr, 1) 543 544 // Mark that the process is running. 545 procs.Lock() 546 if procs.waits == nil { 547 procs.waits = make(map[int]chan *waitErr) 548 } 549 procs.waits[ret.pid] = waitc 550 procs.Unlock() 551 552 forkc <- ret 553 554 var w waitErr 555 for w.err == nil && w.Pid != ret.pid { 556 w.err = Await(&w.Waitmsg) 557 } 558 waitc <- &w 559 close(waitc) 560 }() 561 ret := <-forkc 562 return ret.pid, ret.err 563 } 564 565 // Combination of fork and exec, careful to be thread safe. 566 func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { 567 return startProcess(argv0, argv, attr) 568 } 569 570 // StartProcess wraps ForkExec for package os. 571 func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) { 572 pid, err = startProcess(argv0, argv, attr) 573 return pid, 0, err 574 } 575 576 // Ordinary exec. 577 func Exec(argv0 string, argv []string, envv []string) (err error) { 578 if envv != nil { 579 r1, _, _ := RawSyscall(SYS_RFORK, RFCENVG, 0, 0) 580 if int32(r1) == -1 { 581 return NewError(errstr()) 582 } 583 584 for _, v := range envv { 585 i := 0 586 for i < len(v) && v[i] != '=' { 587 i++ 588 } 589 590 fd, e := Create("/env/"+v[:i], O_WRONLY, 0666) 591 if e != nil { 592 return e 593 } 594 595 _, e = Write(fd, []byte(v[i+1:])) 596 if e != nil { 597 Close(fd) 598 return e 599 } 600 Close(fd) 601 } 602 } 603 604 argv0p, err := BytePtrFromString(argv0) 605 if err != nil { 606 return err 607 } 608 argvp, err := SlicePtrFromStrings(argv) 609 if err != nil { 610 return err 611 } 612 _, _, e1 := Syscall(SYS_EXEC, 613 uintptr(unsafe.Pointer(argv0p)), 614 uintptr(unsafe.Pointer(&argvp[0])), 615 0) 616 617 return e1 618 } 619 620 // WaitProcess waits until the pid of a 621 // running process is found in the queue of 622 // wait messages. It is used in conjunction 623 // with ForkExec/StartProcess to wait for a 624 // running process to exit. 625 func WaitProcess(pid int, w *Waitmsg) (err error) { 626 procs.Lock() 627 ch := procs.waits[pid] 628 procs.Unlock() 629 630 var wmsg *waitErr 631 if ch != nil { 632 wmsg = <-ch 633 procs.Lock() 634 if procs.waits[pid] == ch { 635 delete(procs.waits, pid) 636 } 637 procs.Unlock() 638 } 639 if wmsg == nil { 640 // ch was missing or ch is closed 641 return NewError("process not found") 642 } 643 if wmsg.err != nil { 644 return wmsg.err 645 } 646 if w != nil { 647 *w = wmsg.Waitmsg 648 } 649 return nil 650 }