github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/syscall/exec_plan9.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Fork, exec, wait, etc. 6 7 package syscall 8 9 import ( 10 "runtime" 11 "sync" 12 "unsafe" 13 ) 14 15 // Lock synchronizing creation of new file descriptors with fork. 16 // 17 // We want the child in a fork/exec sequence to inherit only the 18 // file descriptors we intend. To do that, we mark all file 19 // descriptors close-on-exec and then, in the child, explicitly 20 // unmark the ones we want the exec'ed program to keep. 21 // Unix doesn't make this easy: there is, in general, no way to 22 // allocate a new file descriptor close-on-exec. Instead you 23 // have to allocate the descriptor and then mark it close-on-exec. 24 // If a fork happens between those two events, the child's exec 25 // will inherit an unwanted file descriptor. 26 // 27 // This lock solves that race: the create new fd/mark close-on-exec 28 // operation is done holding ForkLock for reading, and the fork itself 29 // is done holding ForkLock for writing. At least, that's the idea. 30 // There are some complications. 31 // 32 // Some system calls that create new file descriptors can block 33 // for arbitrarily long times: open on a hung NFS server or named 34 // pipe, accept on a socket, and so on. We can't reasonably grab 35 // the lock across those operations. 36 // 37 // It is worse to inherit some file descriptors than others. 38 // If a non-malicious child accidentally inherits an open ordinary file, 39 // that's not a big deal. On the other hand, if a long-lived child 40 // accidentally inherits the write end of a pipe, then the reader 41 // of that pipe will not see EOF until that child exits, potentially 42 // causing the parent program to hang. This is a common problem 43 // in threaded C programs that use popen. 44 // 45 // Luckily, the file descriptors that are most important not to 46 // inherit are not the ones that can take an arbitrarily long time 47 // to create: pipe returns instantly, and the net package uses 48 // non-blocking I/O to accept on a listening socket. 49 // The rules for which file descriptor-creating operations use the 50 // ForkLock are as follows: 51 // 52 // 1) Pipe. Does not block. Use the ForkLock. 53 // 2) Socket. Does not block. Use the ForkLock. 54 // 3) Accept. If using non-blocking mode, use the ForkLock. 55 // Otherwise, live with the race. 56 // 4) Open. Can block. Use O_CLOEXEC if available (Linux). 57 // Otherwise, live with the race. 58 // 5) Dup. Does not block. Use the ForkLock. 59 // On Linux, could use fcntl F_DUPFD_CLOEXEC 60 // instead of the ForkLock, but only for dup(fd, -1). 61 62 var ForkLock sync.RWMutex 63 64 // StringSlicePtr converts a slice of strings to a slice of pointers 65 // to NUL-terminated byte arrays. If any string contains a NUL byte 66 // this function panics instead of returning an error. 67 // 68 // Deprecated: Use SlicePtrFromStrings instead. 69 func StringSlicePtr(ss []string) []*byte { 70 bb := make([]*byte, len(ss)+1) 71 for i := 0; i < len(ss); i++ { 72 bb[i] = StringBytePtr(ss[i]) 73 } 74 bb[len(ss)] = nil 75 return bb 76 } 77 78 // SlicePtrFromStrings converts a slice of strings to a slice of 79 // pointers to NUL-terminated byte arrays. If any string contains 80 // a NUL byte, it returns (nil, EINVAL). 81 func SlicePtrFromStrings(ss []string) ([]*byte, error) { 82 var err error 83 bb := make([]*byte, len(ss)+1) 84 for i := 0; i < len(ss); i++ { 85 bb[i], err = BytePtrFromString(ss[i]) 86 if err != nil { 87 return nil, err 88 } 89 } 90 bb[len(ss)] = nil 91 return bb, nil 92 } 93 94 // readdirnames returns the names of files inside the directory represented by dirfd. 95 func readdirnames(dirfd int) (names []string, err error) { 96 names = make([]string, 0, 100) 97 var buf [STATMAX]byte 98 99 for { 100 n, e := Read(dirfd, buf[:]) 101 if e != nil { 102 return nil, e 103 } 104 if n == 0 { 105 break 106 } 107 for i := 0; i < n; { 108 m, _ := gbit16(buf[i:]) 109 m += 2 110 111 if m < STATFIXLEN { 112 return nil, ErrBadStat 113 } 114 115 s, _, ok := gstring(buf[i+41:]) 116 if !ok { 117 return nil, ErrBadStat 118 } 119 names = append(names, s) 120 i += int(m) 121 } 122 } 123 return 124 } 125 126 // readdupdevice returns a list of currently opened fds (excluding stdin, stdout, stderr) from the dup device #d. 127 // ForkLock should be write locked before calling, so that no new fds would be created while the fd list is being read. 128 func readdupdevice() (fds []int, err error) { 129 dupdevfd, err := Open("#d", O_RDONLY) 130 if err != nil { 131 return 132 } 133 defer Close(dupdevfd) 134 135 names, err := readdirnames(dupdevfd) 136 if err != nil { 137 return 138 } 139 140 fds = make([]int, 0, len(names)/2) 141 for _, name := range names { 142 if n := len(name); n > 3 && name[n-3:n] == "ctl" { 143 continue 144 } 145 fd := int(atoi([]byte(name))) 146 switch fd { 147 case 0, 1, 2, dupdevfd: 148 continue 149 } 150 fds = append(fds, fd) 151 } 152 return 153 } 154 155 var startupFds []int 156 157 // Plan 9 does not allow clearing the OCEXEC flag 158 // from the underlying channel backing an open file descriptor, 159 // therefore we store a list of already opened file descriptors 160 // inside startupFds and skip them when manually closing descriptors 161 // not meant to be passed to a child exec. 162 func init() { 163 startupFds, _ = readdupdevice() 164 } 165 166 // forkAndExecInChild forks the process, calling dup onto 0..len(fd) 167 // and finally invoking exec(argv0, argvv, envv) in the child. 168 // If a dup or exec fails, it writes the error string to pipe. 169 // (The pipe write end is close-on-exec so if exec succeeds, it will be closed.) 170 // 171 // In the child, this function must not acquire any locks, because 172 // they might have been locked at the time of the fork. This means 173 // no rescheduling, no malloc calls, and no new stack segments. 174 // The calls to RawSyscall are okay because they are assembly 175 // functions that do not grow the stack. 176 func forkAndExecInChild(argv0 *byte, argv []*byte, envv []envItem, dir *byte, attr *ProcAttr, fdsToClose []int, pipe int, rflag int) (pid int, err error) { 177 // Declare all variables at top in case any 178 // declarations require heap allocation (e.g., errbuf). 179 var ( 180 r1 uintptr 181 nextfd int 182 i int 183 clearenv int 184 envfd int 185 errbuf [ERRMAX]byte 186 ) 187 188 // Guard against side effects of shuffling fds below. 189 // Make sure that nextfd is beyond any currently open files so 190 // that we can't run the risk of overwriting any of them. 191 fd := make([]int, len(attr.Files)) 192 nextfd = len(attr.Files) 193 for i, ufd := range attr.Files { 194 if nextfd < int(ufd) { 195 nextfd = int(ufd) 196 } 197 fd[i] = int(ufd) 198 } 199 nextfd++ 200 201 if envv != nil { 202 clearenv = RFCENVG 203 } 204 205 // About to call fork. 206 // No more allocation or calls of non-assembly functions. 207 r1, _, _ = RawSyscall(SYS_RFORK, uintptr(RFPROC|RFFDG|RFREND|clearenv|rflag), 0, 0) 208 209 if r1 != 0 { 210 if int32(r1) == -1 { 211 return 0, NewError(errstr()) 212 } 213 // parent; return PID 214 return int(r1), nil 215 } 216 217 // Fork succeeded, now in child. 218 219 // Close fds we don't need. 220 for i = 0; i < len(fdsToClose); i++ { 221 r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(fdsToClose[i]), 0, 0) 222 if int32(r1) == -1 { 223 goto childerror 224 } 225 } 226 227 if envv != nil { 228 // Write new environment variables. 229 for i = 0; i < len(envv); i++ { 230 r1, _, _ = RawSyscall(SYS_CREATE, uintptr(unsafe.Pointer(envv[i].name)), uintptr(O_WRONLY), uintptr(0666)) 231 232 if int32(r1) == -1 { 233 goto childerror 234 } 235 236 envfd = int(r1) 237 238 r1, _, _ = RawSyscall6(SYS_PWRITE, uintptr(envfd), uintptr(unsafe.Pointer(envv[i].value)), uintptr(envv[i].nvalue), 239 ^uintptr(0), ^uintptr(0), 0) 240 241 if int32(r1) == -1 || int(r1) != envv[i].nvalue { 242 goto childerror 243 } 244 245 r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(envfd), 0, 0) 246 247 if int32(r1) == -1 { 248 goto childerror 249 } 250 } 251 } 252 253 // Chdir 254 if dir != nil { 255 r1, _, _ = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0) 256 if int32(r1) == -1 { 257 goto childerror 258 } 259 } 260 261 // Pass 1: look for fd[i] < i and move those up above len(fd) 262 // so that pass 2 won't stomp on an fd it needs later. 263 if pipe < nextfd { 264 r1, _, _ = RawSyscall(SYS_DUP, uintptr(pipe), uintptr(nextfd), 0) 265 if int32(r1) == -1 { 266 goto childerror 267 } 268 pipe = nextfd 269 nextfd++ 270 } 271 for i = 0; i < len(fd); i++ { 272 if fd[i] >= 0 && fd[i] < int(i) { 273 r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(nextfd), 0) 274 if int32(r1) == -1 { 275 goto childerror 276 } 277 278 fd[i] = nextfd 279 nextfd++ 280 if nextfd == pipe { // don't stomp on pipe 281 nextfd++ 282 } 283 } 284 } 285 286 // Pass 2: dup fd[i] down onto i. 287 for i = 0; i < len(fd); i++ { 288 if fd[i] == -1 { 289 RawSyscall(SYS_CLOSE, uintptr(i), 0, 0) 290 continue 291 } 292 if fd[i] == int(i) { 293 continue 294 } 295 r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(i), 0) 296 if int32(r1) == -1 { 297 goto childerror 298 } 299 } 300 301 // Pass 3: close fd[i] if it was moved in the previous pass. 302 for i = 0; i < len(fd); i++ { 303 if fd[i] >= 0 && fd[i] != int(i) { 304 RawSyscall(SYS_CLOSE, uintptr(fd[i]), 0, 0) 305 } 306 } 307 308 // Time to exec. 309 r1, _, _ = RawSyscall(SYS_EXEC, 310 uintptr(unsafe.Pointer(argv0)), 311 uintptr(unsafe.Pointer(&argv[0])), 0) 312 313 childerror: 314 // send error string on pipe 315 RawSyscall(SYS_ERRSTR, uintptr(unsafe.Pointer(&errbuf[0])), uintptr(len(errbuf)), 0) 316 errbuf[len(errbuf)-1] = 0 317 i = 0 318 for i < len(errbuf) && errbuf[i] != 0 { 319 i++ 320 } 321 322 RawSyscall6(SYS_PWRITE, uintptr(pipe), uintptr(unsafe.Pointer(&errbuf[0])), uintptr(i), 323 ^uintptr(0), ^uintptr(0), 0) 324 325 for { 326 RawSyscall(SYS_EXITS, 0, 0, 0) 327 } 328 329 // Calling panic is not actually safe, 330 // but the for loop above won't break 331 // and this shuts up the compiler. 332 panic("unreached") 333 } 334 335 func cexecPipe(p []int) error { 336 e := Pipe(p) 337 if e != nil { 338 return e 339 } 340 341 fd, e := Open("#d/"+itoa(p[1]), O_CLOEXEC) 342 if e != nil { 343 Close(p[0]) 344 Close(p[1]) 345 return e 346 } 347 348 Close(fd) 349 return nil 350 } 351 352 type envItem struct { 353 name *byte 354 value *byte 355 nvalue int 356 } 357 358 type ProcAttr struct { 359 Dir string // Current working directory. 360 Env []string // Environment. 361 Files []uintptr // File descriptors. 362 Sys *SysProcAttr 363 } 364 365 type SysProcAttr struct { 366 Rfork int // additional flags to pass to rfork 367 } 368 369 var zeroProcAttr ProcAttr 370 var zeroSysProcAttr SysProcAttr 371 372 func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { 373 var ( 374 p [2]int 375 n int 376 errbuf [ERRMAX]byte 377 wmsg Waitmsg 378 ) 379 380 if attr == nil { 381 attr = &zeroProcAttr 382 } 383 sys := attr.Sys 384 if sys == nil { 385 sys = &zeroSysProcAttr 386 } 387 388 p[0] = -1 389 p[1] = -1 390 391 // Convert args to C form. 392 argv0p, err := BytePtrFromString(argv0) 393 if err != nil { 394 return 0, err 395 } 396 argvp, err := SlicePtrFromStrings(argv) 397 if err != nil { 398 return 0, err 399 } 400 401 destDir := attr.Dir 402 if destDir == "" { 403 wdmu.Lock() 404 destDir = wdStr 405 wdmu.Unlock() 406 } 407 var dir *byte 408 if destDir != "" { 409 dir, err = BytePtrFromString(destDir) 410 if err != nil { 411 return 0, err 412 } 413 } 414 var envvParsed []envItem 415 if attr.Env != nil { 416 envvParsed = make([]envItem, 0, len(attr.Env)) 417 for _, v := range attr.Env { 418 i := 0 419 for i < len(v) && v[i] != '=' { 420 i++ 421 } 422 423 envname, err := BytePtrFromString("/env/" + v[:i]) 424 if err != nil { 425 return 0, err 426 } 427 envvalue := make([]byte, len(v)-i) 428 copy(envvalue, v[i+1:]) 429 envvParsed = append(envvParsed, envItem{envname, &envvalue[0], len(v) - i}) 430 } 431 } 432 433 // Acquire the fork lock to prevent other threads from creating new fds before we fork. 434 ForkLock.Lock() 435 436 // get a list of open fds, excluding stdin,stdout and stderr that need to be closed in the child. 437 // no new fds can be created while we hold the ForkLock for writing. 438 openFds, e := readdupdevice() 439 if e != nil { 440 ForkLock.Unlock() 441 return 0, e 442 } 443 444 fdsToClose := make([]int, 0, len(openFds)) 445 for _, fd := range openFds { 446 doClose := true 447 448 // exclude files opened at startup. 449 for _, sfd := range startupFds { 450 if fd == sfd { 451 doClose = false 452 break 453 } 454 } 455 456 // exclude files explicitly requested by the caller. 457 for _, rfd := range attr.Files { 458 if fd == int(rfd) { 459 doClose = false 460 break 461 } 462 } 463 464 if doClose { 465 fdsToClose = append(fdsToClose, fd) 466 } 467 } 468 469 // Allocate child status pipe close on exec. 470 e = cexecPipe(p[:]) 471 472 if e != nil { 473 return 0, e 474 } 475 fdsToClose = append(fdsToClose, p[0]) 476 477 // Kick off child. 478 pid, err = forkAndExecInChild(argv0p, argvp, envvParsed, dir, attr, fdsToClose, p[1], sys.Rfork) 479 480 if err != nil { 481 if p[0] >= 0 { 482 Close(p[0]) 483 Close(p[1]) 484 } 485 ForkLock.Unlock() 486 return 0, err 487 } 488 ForkLock.Unlock() 489 490 // Read child error status from pipe. 491 Close(p[1]) 492 n, err = Read(p[0], errbuf[:]) 493 Close(p[0]) 494 495 if err != nil || n != 0 { 496 if n != 0 { 497 err = NewError(string(errbuf[:n])) 498 } 499 500 // Child failed; wait for it to exit, to make sure 501 // the zombies don't accumulate. 502 for wmsg.Pid != pid { 503 Await(&wmsg) 504 } 505 return 0, err 506 } 507 508 // Read got EOF, so pipe closed on exec, so exec succeeded. 509 return pid, nil 510 } 511 512 type waitErr struct { 513 Waitmsg 514 err error 515 } 516 517 var procs struct { 518 sync.Mutex 519 waits map[int]chan *waitErr 520 } 521 522 // startProcess starts a new goroutine, tied to the OS 523 // thread, which runs the process and subsequently waits 524 // for it to finish, communicating the process stats back 525 // to any goroutines that may have been waiting on it. 526 // 527 // Such a dedicated goroutine is needed because on 528 // Plan 9, only the parent thread can wait for a child, 529 // whereas goroutines tend to jump OS threads (e.g., 530 // between starting a process and running Wait(), the 531 // goroutine may have been rescheduled). 532 func startProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { 533 type forkRet struct { 534 pid int 535 err error 536 } 537 538 forkc := make(chan forkRet, 1) 539 go func() { 540 runtime.LockOSThread() 541 var ret forkRet 542 543 ret.pid, ret.err = forkExec(argv0, argv, attr) 544 // If fork fails there is nothing to wait for. 545 if ret.err != nil || ret.pid == 0 { 546 forkc <- ret 547 return 548 } 549 550 waitc := make(chan *waitErr, 1) 551 552 // Mark that the process is running. 553 procs.Lock() 554 if procs.waits == nil { 555 procs.waits = make(map[int]chan *waitErr) 556 } 557 procs.waits[ret.pid] = waitc 558 procs.Unlock() 559 560 forkc <- ret 561 562 var w waitErr 563 for w.err == nil && w.Pid != ret.pid { 564 w.err = Await(&w.Waitmsg) 565 } 566 waitc <- &w 567 close(waitc) 568 }() 569 ret := <-forkc 570 return ret.pid, ret.err 571 } 572 573 // Combination of fork and exec, careful to be thread safe. 574 func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { 575 return startProcess(argv0, argv, attr) 576 } 577 578 // StartProcess wraps ForkExec for package os. 579 func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) { 580 pid, err = startProcess(argv0, argv, attr) 581 return pid, 0, err 582 } 583 584 // Ordinary exec. 585 func Exec(argv0 string, argv []string, envv []string) (err error) { 586 if envv != nil { 587 r1, _, _ := RawSyscall(SYS_RFORK, RFCENVG, 0, 0) 588 if int32(r1) == -1 { 589 return NewError(errstr()) 590 } 591 592 for _, v := range envv { 593 i := 0 594 for i < len(v) && v[i] != '=' { 595 i++ 596 } 597 598 fd, e := Create("/env/"+v[:i], O_WRONLY, 0666) 599 if e != nil { 600 return e 601 } 602 603 _, e = Write(fd, []byte(v[i+1:])) 604 if e != nil { 605 Close(fd) 606 return e 607 } 608 Close(fd) 609 } 610 } 611 612 argv0p, err := BytePtrFromString(argv0) 613 if err != nil { 614 return err 615 } 616 argvp, err := SlicePtrFromStrings(argv) 617 if err != nil { 618 return err 619 } 620 _, _, e1 := Syscall(SYS_EXEC, 621 uintptr(unsafe.Pointer(argv0p)), 622 uintptr(unsafe.Pointer(&argvp[0])), 623 0) 624 625 return e1 626 } 627 628 // WaitProcess waits until the pid of a 629 // running process is found in the queue of 630 // wait messages. It is used in conjunction 631 // with ForkExec/StartProcess to wait for a 632 // running process to exit. 633 func WaitProcess(pid int, w *Waitmsg) (err error) { 634 procs.Lock() 635 ch := procs.waits[pid] 636 procs.Unlock() 637 638 var wmsg *waitErr 639 if ch != nil { 640 wmsg = <-ch 641 procs.Lock() 642 if procs.waits[pid] == ch { 643 delete(procs.waits, pid) 644 } 645 procs.Unlock() 646 } 647 if wmsg == nil { 648 // ch was missing or ch is closed 649 return NewError("process not found") 650 } 651 if wmsg.err != nil { 652 return wmsg.err 653 } 654 if w != nil { 655 *w = wmsg.Waitmsg 656 } 657 return nil 658 }