github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/syscall/exec_plan9.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Fork, exec, wait, etc.
     6  
     7  package syscall
     8  
     9  import (
    10  	"runtime"
    11  	"sync"
    12  	"unsafe"
    13  )
    14  
    15  // Lock synchronizing creation of new file descriptors with fork.
    16  //
    17  // We want the child in a fork/exec sequence to inherit only the
    18  // file descriptors we intend.  To do that, we mark all file
    19  // descriptors close-on-exec and then, in the child, explicitly
    20  // unmark the ones we want the exec'ed program to keep.
    21  // Unix doesn't make this easy: there is, in general, no way to
    22  // allocate a new file descriptor close-on-exec.  Instead you
    23  // have to allocate the descriptor and then mark it close-on-exec.
    24  // If a fork happens between those two events, the child's exec
    25  // will inherit an unwanted file descriptor.
    26  //
    27  // This lock solves that race: the create new fd/mark close-on-exec
    28  // operation is done holding ForkLock for reading, and the fork itself
    29  // is done holding ForkLock for writing.  At least, that's the idea.
    30  // There are some complications.
    31  //
    32  // Some system calls that create new file descriptors can block
    33  // for arbitrarily long times: open on a hung NFS server or named
    34  // pipe, accept on a socket, and so on.  We can't reasonably grab
    35  // the lock across those operations.
    36  //
    37  // It is worse to inherit some file descriptors than others.
    38  // If a non-malicious child accidentally inherits an open ordinary file,
    39  // that's not a big deal.  On the other hand, if a long-lived child
    40  // accidentally inherits the write end of a pipe, then the reader
    41  // of that pipe will not see EOF until that child exits, potentially
    42  // causing the parent program to hang.  This is a common problem
    43  // in threaded C programs that use popen.
    44  //
    45  // Luckily, the file descriptors that are most important not to
    46  // inherit are not the ones that can take an arbitrarily long time
    47  // to create: pipe returns instantly, and the net package uses
    48  // non-blocking I/O to accept on a listening socket.
    49  // The rules for which file descriptor-creating operations use the
    50  // ForkLock are as follows:
    51  //
    52  // 1) Pipe.    Does not block.  Use the ForkLock.
    53  // 2) Socket.  Does not block.  Use the ForkLock.
    54  // 3) Accept.  If using non-blocking mode, use the ForkLock.
    55  //             Otherwise, live with the race.
    56  // 4) Open.    Can block.  Use O_CLOEXEC if available (Linux).
    57  //             Otherwise, live with the race.
    58  // 5) Dup.     Does not block.  Use the ForkLock.
    59  //             On Linux, could use fcntl F_DUPFD_CLOEXEC
    60  //             instead of the ForkLock, but only for dup(fd, -1).
    61  
    62  var ForkLock sync.RWMutex
    63  
    64  // StringSlicePtr converts a slice of strings to a slice of pointers
    65  // to NUL-terminated byte arrays. If any string contains a NUL byte
    66  // this function panics instead of returning an error.
    67  //
    68  // Deprecated: Use SlicePtrFromStrings instead.
    69  func StringSlicePtr(ss []string) []*byte {
    70  	bb := make([]*byte, len(ss)+1)
    71  	for i := 0; i < len(ss); i++ {
    72  		bb[i] = StringBytePtr(ss[i])
    73  	}
    74  	bb[len(ss)] = nil
    75  	return bb
    76  }
    77  
    78  // SlicePtrFromStrings converts a slice of strings to a slice of
    79  // pointers to NUL-terminated byte arrays. If any string contains
    80  // a NUL byte, it returns (nil, EINVAL).
    81  func SlicePtrFromStrings(ss []string) ([]*byte, error) {
    82  	var err error
    83  	bb := make([]*byte, len(ss)+1)
    84  	for i := 0; i < len(ss); i++ {
    85  		bb[i], err = BytePtrFromString(ss[i])
    86  		if err != nil {
    87  			return nil, err
    88  		}
    89  	}
    90  	bb[len(ss)] = nil
    91  	return bb, nil
    92  }
    93  
    94  // readdirnames returns the names of files inside the directory represented by dirfd.
    95  func readdirnames(dirfd int) (names []string, err error) {
    96  	names = make([]string, 0, 100)
    97  	var buf [STATMAX]byte
    98  
    99  	for {
   100  		n, e := Read(dirfd, buf[:])
   101  		if e != nil {
   102  			return nil, e
   103  		}
   104  		if n == 0 {
   105  			break
   106  		}
   107  		for i := 0; i < n; {
   108  			m, _ := gbit16(buf[i:])
   109  			m += 2
   110  
   111  			if m < STATFIXLEN {
   112  				return nil, ErrBadStat
   113  			}
   114  
   115  			s, _, ok := gstring(buf[i+41:])
   116  			if !ok {
   117  				return nil, ErrBadStat
   118  			}
   119  			names = append(names, s)
   120  			i += int(m)
   121  		}
   122  	}
   123  	return
   124  }
   125  
   126  // readdupdevice returns a list of currently opened fds (excluding stdin, stdout, stderr) from the dup device #d.
   127  // ForkLock should be write locked before calling, so that no new fds would be created while the fd list is being read.
   128  func readdupdevice() (fds []int, err error) {
   129  	dupdevfd, err := Open("#d", O_RDONLY)
   130  	if err != nil {
   131  		return
   132  	}
   133  	defer Close(dupdevfd)
   134  
   135  	names, err := readdirnames(dupdevfd)
   136  	if err != nil {
   137  		return
   138  	}
   139  
   140  	fds = make([]int, 0, len(names)/2)
   141  	for _, name := range names {
   142  		if n := len(name); n > 3 && name[n-3:n] == "ctl" {
   143  			continue
   144  		}
   145  		fd := int(atoi([]byte(name)))
   146  		switch fd {
   147  		case 0, 1, 2, dupdevfd:
   148  			continue
   149  		}
   150  		fds = append(fds, fd)
   151  	}
   152  	return
   153  }
   154  
   155  var startupFds []int
   156  
   157  // Plan 9 does not allow clearing the OCEXEC flag
   158  // from the underlying channel backing an open file descriptor,
   159  // therefore we store a list of already opened file descriptors
   160  // inside startupFds and skip them when manually closing descriptors
   161  // not meant to be passed to a child exec.
   162  func init() {
   163  	startupFds, _ = readdupdevice()
   164  }
   165  
   166  // forkAndExecInChild forks the process, calling dup onto 0..len(fd)
   167  // and finally invoking exec(argv0, argvv, envv) in the child.
   168  // If a dup or exec fails, it writes the error string to pipe.
   169  // (The pipe write end is close-on-exec so if exec succeeds, it will be closed.)
   170  //
   171  // In the child, this function must not acquire any locks, because
   172  // they might have been locked at the time of the fork.  This means
   173  // no rescheduling, no malloc calls, and no new stack segments.
   174  // The calls to RawSyscall are okay because they are assembly
   175  // functions that do not grow the stack.
   176  func forkAndExecInChild(argv0 *byte, argv []*byte, envv []envItem, dir *byte, attr *ProcAttr, fdsToClose []int, pipe int, rflag int) (pid int, err error) {
   177  	// Declare all variables at top in case any
   178  	// declarations require heap allocation (e.g., errbuf).
   179  	var (
   180  		r1       uintptr
   181  		nextfd   int
   182  		i        int
   183  		clearenv int
   184  		envfd    int
   185  		errbuf   [ERRMAX]byte
   186  	)
   187  
   188  	// Guard against side effects of shuffling fds below.
   189  	// Make sure that nextfd is beyond any currently open files so
   190  	// that we can't run the risk of overwriting any of them.
   191  	fd := make([]int, len(attr.Files))
   192  	nextfd = len(attr.Files)
   193  	for i, ufd := range attr.Files {
   194  		if nextfd < int(ufd) {
   195  			nextfd = int(ufd)
   196  		}
   197  		fd[i] = int(ufd)
   198  	}
   199  	nextfd++
   200  
   201  	if envv != nil {
   202  		clearenv = RFCENVG
   203  	}
   204  
   205  	// About to call fork.
   206  	// No more allocation or calls of non-assembly functions.
   207  	r1, _, _ = RawSyscall(SYS_RFORK, uintptr(RFPROC|RFFDG|RFREND|clearenv|rflag), 0, 0)
   208  
   209  	if r1 != 0 {
   210  		if int32(r1) == -1 {
   211  			return 0, NewError(errstr())
   212  		}
   213  		// parent; return PID
   214  		return int(r1), nil
   215  	}
   216  
   217  	// Fork succeeded, now in child.
   218  
   219  	// Close fds we don't need.
   220  	for i = 0; i < len(fdsToClose); i++ {
   221  		r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(fdsToClose[i]), 0, 0)
   222  		if int32(r1) == -1 {
   223  			goto childerror
   224  		}
   225  	}
   226  
   227  	if envv != nil {
   228  		// Write new environment variables.
   229  		for i = 0; i < len(envv); i++ {
   230  			r1, _, _ = RawSyscall(SYS_CREATE, uintptr(unsafe.Pointer(envv[i].name)), uintptr(O_WRONLY), uintptr(0666))
   231  
   232  			if int32(r1) == -1 {
   233  				goto childerror
   234  			}
   235  
   236  			envfd = int(r1)
   237  
   238  			r1, _, _ = RawSyscall6(SYS_PWRITE, uintptr(envfd), uintptr(unsafe.Pointer(envv[i].value)), uintptr(envv[i].nvalue),
   239  				^uintptr(0), ^uintptr(0), 0)
   240  
   241  			if int32(r1) == -1 || int(r1) != envv[i].nvalue {
   242  				goto childerror
   243  			}
   244  
   245  			r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(envfd), 0, 0)
   246  
   247  			if int32(r1) == -1 {
   248  				goto childerror
   249  			}
   250  		}
   251  	}
   252  
   253  	// Chdir
   254  	if dir != nil {
   255  		r1, _, _ = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0)
   256  		if int32(r1) == -1 {
   257  			goto childerror
   258  		}
   259  	}
   260  
   261  	// Pass 1: look for fd[i] < i and move those up above len(fd)
   262  	// so that pass 2 won't stomp on an fd it needs later.
   263  	if pipe < nextfd {
   264  		r1, _, _ = RawSyscall(SYS_DUP, uintptr(pipe), uintptr(nextfd), 0)
   265  		if int32(r1) == -1 {
   266  			goto childerror
   267  		}
   268  		pipe = nextfd
   269  		nextfd++
   270  	}
   271  	for i = 0; i < len(fd); i++ {
   272  		if fd[i] >= 0 && fd[i] < int(i) {
   273  			r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(nextfd), 0)
   274  			if int32(r1) == -1 {
   275  				goto childerror
   276  			}
   277  
   278  			fd[i] = nextfd
   279  			nextfd++
   280  			if nextfd == pipe { // don't stomp on pipe
   281  				nextfd++
   282  			}
   283  		}
   284  	}
   285  
   286  	// Pass 2: dup fd[i] down onto i.
   287  	for i = 0; i < len(fd); i++ {
   288  		if fd[i] == -1 {
   289  			RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
   290  			continue
   291  		}
   292  		if fd[i] == int(i) {
   293  			continue
   294  		}
   295  		r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(i), 0)
   296  		if int32(r1) == -1 {
   297  			goto childerror
   298  		}
   299  	}
   300  
   301  	// Pass 3: close fd[i] if it was moved in the previous pass.
   302  	for i = 0; i < len(fd); i++ {
   303  		if fd[i] >= 0 && fd[i] != int(i) {
   304  			RawSyscall(SYS_CLOSE, uintptr(fd[i]), 0, 0)
   305  		}
   306  	}
   307  
   308  	// Time to exec.
   309  	r1, _, _ = RawSyscall(SYS_EXEC,
   310  		uintptr(unsafe.Pointer(argv0)),
   311  		uintptr(unsafe.Pointer(&argv[0])), 0)
   312  
   313  childerror:
   314  	// send error string on pipe
   315  	RawSyscall(SYS_ERRSTR, uintptr(unsafe.Pointer(&errbuf[0])), uintptr(len(errbuf)), 0)
   316  	errbuf[len(errbuf)-1] = 0
   317  	i = 0
   318  	for i < len(errbuf) && errbuf[i] != 0 {
   319  		i++
   320  	}
   321  
   322  	RawSyscall6(SYS_PWRITE, uintptr(pipe), uintptr(unsafe.Pointer(&errbuf[0])), uintptr(i),
   323  		^uintptr(0), ^uintptr(0), 0)
   324  
   325  	for {
   326  		RawSyscall(SYS_EXITS, 0, 0, 0)
   327  	}
   328  
   329  	// Calling panic is not actually safe,
   330  	// but the for loop above won't break
   331  	// and this shuts up the compiler.
   332  	panic("unreached")
   333  }
   334  
   335  func cexecPipe(p []int) error {
   336  	e := Pipe(p)
   337  	if e != nil {
   338  		return e
   339  	}
   340  
   341  	fd, e := Open("#d/"+itoa(p[1]), O_CLOEXEC)
   342  	if e != nil {
   343  		Close(p[0])
   344  		Close(p[1])
   345  		return e
   346  	}
   347  
   348  	Close(fd)
   349  	return nil
   350  }
   351  
   352  type envItem struct {
   353  	name   *byte
   354  	value  *byte
   355  	nvalue int
   356  }
   357  
   358  type ProcAttr struct {
   359  	Dir   string    // Current working directory.
   360  	Env   []string  // Environment.
   361  	Files []uintptr // File descriptors.
   362  	Sys   *SysProcAttr
   363  }
   364  
   365  type SysProcAttr struct {
   366  	Rfork int // additional flags to pass to rfork
   367  }
   368  
   369  var zeroProcAttr ProcAttr
   370  var zeroSysProcAttr SysProcAttr
   371  
   372  func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   373  	var (
   374  		p      [2]int
   375  		n      int
   376  		errbuf [ERRMAX]byte
   377  		wmsg   Waitmsg
   378  	)
   379  
   380  	if attr == nil {
   381  		attr = &zeroProcAttr
   382  	}
   383  	sys := attr.Sys
   384  	if sys == nil {
   385  		sys = &zeroSysProcAttr
   386  	}
   387  
   388  	p[0] = -1
   389  	p[1] = -1
   390  
   391  	// Convert args to C form.
   392  	argv0p, err := BytePtrFromString(argv0)
   393  	if err != nil {
   394  		return 0, err
   395  	}
   396  	argvp, err := SlicePtrFromStrings(argv)
   397  	if err != nil {
   398  		return 0, err
   399  	}
   400  
   401  	destDir := attr.Dir
   402  	if destDir == "" {
   403  		wdmu.Lock()
   404  		destDir = wdStr
   405  		wdmu.Unlock()
   406  	}
   407  	var dir *byte
   408  	if destDir != "" {
   409  		dir, err = BytePtrFromString(destDir)
   410  		if err != nil {
   411  			return 0, err
   412  		}
   413  	}
   414  	var envvParsed []envItem
   415  	if attr.Env != nil {
   416  		envvParsed = make([]envItem, 0, len(attr.Env))
   417  		for _, v := range attr.Env {
   418  			i := 0
   419  			for i < len(v) && v[i] != '=' {
   420  				i++
   421  			}
   422  
   423  			envname, err := BytePtrFromString("/env/" + v[:i])
   424  			if err != nil {
   425  				return 0, err
   426  			}
   427  			envvalue := make([]byte, len(v)-i)
   428  			copy(envvalue, v[i+1:])
   429  			envvParsed = append(envvParsed, envItem{envname, &envvalue[0], len(v) - i})
   430  		}
   431  	}
   432  
   433  	// Acquire the fork lock to prevent other threads from creating new fds before we fork.
   434  	ForkLock.Lock()
   435  
   436  	// get a list of open fds, excluding stdin,stdout and stderr that need to be closed in the child.
   437  	// no new fds can be created while we hold the ForkLock for writing.
   438  	openFds, e := readdupdevice()
   439  	if e != nil {
   440  		ForkLock.Unlock()
   441  		return 0, e
   442  	}
   443  
   444  	fdsToClose := make([]int, 0, len(openFds))
   445  	for _, fd := range openFds {
   446  		doClose := true
   447  
   448  		// exclude files opened at startup.
   449  		for _, sfd := range startupFds {
   450  			if fd == sfd {
   451  				doClose = false
   452  				break
   453  			}
   454  		}
   455  
   456  		// exclude files explicitly requested by the caller.
   457  		for _, rfd := range attr.Files {
   458  			if fd == int(rfd) {
   459  				doClose = false
   460  				break
   461  			}
   462  		}
   463  
   464  		if doClose {
   465  			fdsToClose = append(fdsToClose, fd)
   466  		}
   467  	}
   468  
   469  	// Allocate child status pipe close on exec.
   470  	e = cexecPipe(p[:])
   471  
   472  	if e != nil {
   473  		return 0, e
   474  	}
   475  	fdsToClose = append(fdsToClose, p[0])
   476  
   477  	// Kick off child.
   478  	pid, err = forkAndExecInChild(argv0p, argvp, envvParsed, dir, attr, fdsToClose, p[1], sys.Rfork)
   479  
   480  	if err != nil {
   481  		if p[0] >= 0 {
   482  			Close(p[0])
   483  			Close(p[1])
   484  		}
   485  		ForkLock.Unlock()
   486  		return 0, err
   487  	}
   488  	ForkLock.Unlock()
   489  
   490  	// Read child error status from pipe.
   491  	Close(p[1])
   492  	n, err = Read(p[0], errbuf[:])
   493  	Close(p[0])
   494  
   495  	if err != nil || n != 0 {
   496  		if n != 0 {
   497  			err = NewError(string(errbuf[:n]))
   498  		}
   499  
   500  		// Child failed; wait for it to exit, to make sure
   501  		// the zombies don't accumulate.
   502  		for wmsg.Pid != pid {
   503  			Await(&wmsg)
   504  		}
   505  		return 0, err
   506  	}
   507  
   508  	// Read got EOF, so pipe closed on exec, so exec succeeded.
   509  	return pid, nil
   510  }
   511  
   512  type waitErr struct {
   513  	Waitmsg
   514  	err error
   515  }
   516  
   517  var procs struct {
   518  	sync.Mutex
   519  	waits map[int]chan *waitErr
   520  }
   521  
   522  // startProcess starts a new goroutine, tied to the OS
   523  // thread, which runs the process and subsequently waits
   524  // for it to finish, communicating the process stats back
   525  // to any goroutines that may have been waiting on it.
   526  //
   527  // Such a dedicated goroutine is needed because on
   528  // Plan 9, only the parent thread can wait for a child,
   529  // whereas goroutines tend to jump OS threads (e.g.,
   530  // between starting a process and running Wait(), the
   531  // goroutine may have been rescheduled).
   532  func startProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   533  	type forkRet struct {
   534  		pid int
   535  		err error
   536  	}
   537  
   538  	forkc := make(chan forkRet, 1)
   539  	go func() {
   540  		runtime.LockOSThread()
   541  		var ret forkRet
   542  
   543  		ret.pid, ret.err = forkExec(argv0, argv, attr)
   544  		// If fork fails there is nothing to wait for.
   545  		if ret.err != nil || ret.pid == 0 {
   546  			forkc <- ret
   547  			return
   548  		}
   549  
   550  		waitc := make(chan *waitErr, 1)
   551  
   552  		// Mark that the process is running.
   553  		procs.Lock()
   554  		if procs.waits == nil {
   555  			procs.waits = make(map[int]chan *waitErr)
   556  		}
   557  		procs.waits[ret.pid] = waitc
   558  		procs.Unlock()
   559  
   560  		forkc <- ret
   561  
   562  		var w waitErr
   563  		for w.err == nil && w.Pid != ret.pid {
   564  			w.err = Await(&w.Waitmsg)
   565  		}
   566  		waitc <- &w
   567  		close(waitc)
   568  	}()
   569  	ret := <-forkc
   570  	return ret.pid, ret.err
   571  }
   572  
   573  // Combination of fork and exec, careful to be thread safe.
   574  func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   575  	return startProcess(argv0, argv, attr)
   576  }
   577  
   578  // StartProcess wraps ForkExec for package os.
   579  func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) {
   580  	pid, err = startProcess(argv0, argv, attr)
   581  	return pid, 0, err
   582  }
   583  
   584  // Ordinary exec.
   585  func Exec(argv0 string, argv []string, envv []string) (err error) {
   586  	if envv != nil {
   587  		r1, _, _ := RawSyscall(SYS_RFORK, RFCENVG, 0, 0)
   588  		if int32(r1) == -1 {
   589  			return NewError(errstr())
   590  		}
   591  
   592  		for _, v := range envv {
   593  			i := 0
   594  			for i < len(v) && v[i] != '=' {
   595  				i++
   596  			}
   597  
   598  			fd, e := Create("/env/"+v[:i], O_WRONLY, 0666)
   599  			if e != nil {
   600  				return e
   601  			}
   602  
   603  			_, e = Write(fd, []byte(v[i+1:]))
   604  			if e != nil {
   605  				Close(fd)
   606  				return e
   607  			}
   608  			Close(fd)
   609  		}
   610  	}
   611  
   612  	argv0p, err := BytePtrFromString(argv0)
   613  	if err != nil {
   614  		return err
   615  	}
   616  	argvp, err := SlicePtrFromStrings(argv)
   617  	if err != nil {
   618  		return err
   619  	}
   620  	_, _, e1 := Syscall(SYS_EXEC,
   621  		uintptr(unsafe.Pointer(argv0p)),
   622  		uintptr(unsafe.Pointer(&argvp[0])),
   623  		0)
   624  
   625  	return e1
   626  }
   627  
   628  // WaitProcess waits until the pid of a
   629  // running process is found in the queue of
   630  // wait messages. It is used in conjunction
   631  // with ForkExec/StartProcess to wait for a
   632  // running process to exit.
   633  func WaitProcess(pid int, w *Waitmsg) (err error) {
   634  	procs.Lock()
   635  	ch := procs.waits[pid]
   636  	procs.Unlock()
   637  
   638  	var wmsg *waitErr
   639  	if ch != nil {
   640  		wmsg = <-ch
   641  		procs.Lock()
   642  		if procs.waits[pid] == ch {
   643  			delete(procs.waits, pid)
   644  		}
   645  		procs.Unlock()
   646  	}
   647  	if wmsg == nil {
   648  		// ch was missing or ch is closed
   649  		return NewError("process not found")
   650  	}
   651  	if wmsg.err != nil {
   652  		return wmsg.err
   653  	}
   654  	if w != nil {
   655  		*w = wmsg.Waitmsg
   656  	}
   657  	return nil
   658  }