github.com/rsc/go@v0.0.0-20150416155037-e040fd465409/src/syscall/exec_plan9.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Fork, exec, wait, etc.
     6  
     7  package syscall
     8  
     9  import (
    10  	"runtime"
    11  	"sync"
    12  	"unsafe"
    13  )
    14  
    15  // Lock synchronizing creation of new file descriptors with fork.
    16  //
    17  // We want the child in a fork/exec sequence to inherit only the
    18  // file descriptors we intend.  To do that, we mark all file
    19  // descriptors close-on-exec and then, in the child, explicitly
    20  // unmark the ones we want the exec'ed program to keep.
    21  // Unix doesn't make this easy: there is, in general, no way to
    22  // allocate a new file descriptor close-on-exec.  Instead you
    23  // have to allocate the descriptor and then mark it close-on-exec.
    24  // If a fork happens between those two events, the child's exec
    25  // will inherit an unwanted file descriptor.
    26  //
    27  // This lock solves that race: the create new fd/mark close-on-exec
    28  // operation is done holding ForkLock for reading, and the fork itself
    29  // is done holding ForkLock for writing.  At least, that's the idea.
    30  // There are some complications.
    31  //
    32  // Some system calls that create new file descriptors can block
    33  // for arbitrarily long times: open on a hung NFS server or named
    34  // pipe, accept on a socket, and so on.  We can't reasonably grab
    35  // the lock across those operations.
    36  //
    37  // It is worse to inherit some file descriptors than others.
    38  // If a non-malicious child accidentally inherits an open ordinary file,
    39  // that's not a big deal.  On the other hand, if a long-lived child
    40  // accidentally inherits the write end of a pipe, then the reader
    41  // of that pipe will not see EOF until that child exits, potentially
    42  // causing the parent program to hang.  This is a common problem
    43  // in threaded C programs that use popen.
    44  //
    45  // Luckily, the file descriptors that are most important not to
    46  // inherit are not the ones that can take an arbitrarily long time
    47  // to create: pipe returns instantly, and the net package uses
    48  // non-blocking I/O to accept on a listening socket.
    49  // The rules for which file descriptor-creating operations use the
    50  // ForkLock are as follows:
    51  //
    52  // 1) Pipe.    Does not block.  Use the ForkLock.
    53  // 2) Socket.  Does not block.  Use the ForkLock.
    54  // 3) Accept.  If using non-blocking mode, use the ForkLock.
    55  //             Otherwise, live with the race.
    56  // 4) Open.    Can block.  Use O_CLOEXEC if available (Linux).
    57  //             Otherwise, live with the race.
    58  // 5) Dup.     Does not block.  Use the ForkLock.
    59  //             On Linux, could use fcntl F_DUPFD_CLOEXEC
    60  //             instead of the ForkLock, but only for dup(fd, -1).
    61  
    62  var ForkLock sync.RWMutex
    63  
    64  // StringSlicePtr is deprecated. Use SlicePtrFromStrings instead.
    65  // If any string contains a NUL byte this function panics instead
    66  // of returning an error.
    67  func StringSlicePtr(ss []string) []*byte {
    68  	bb := make([]*byte, len(ss)+1)
    69  	for i := 0; i < len(ss); i++ {
    70  		bb[i] = StringBytePtr(ss[i])
    71  	}
    72  	bb[len(ss)] = nil
    73  	return bb
    74  }
    75  
    76  // SlicePtrFromStrings converts a slice of strings to a slice of
    77  // pointers to NUL-terminated byte slices. If any string contains
    78  // a NUL byte, it returns (nil, EINVAL).
    79  func SlicePtrFromStrings(ss []string) ([]*byte, error) {
    80  	var err error
    81  	bb := make([]*byte, len(ss)+1)
    82  	for i := 0; i < len(ss); i++ {
    83  		bb[i], err = BytePtrFromString(ss[i])
    84  		if err != nil {
    85  			return nil, err
    86  		}
    87  	}
    88  	bb[len(ss)] = nil
    89  	return bb, nil
    90  }
    91  
    92  // readdirnames returns the names of files inside the directory represented by dirfd.
    93  func readdirnames(dirfd int) (names []string, err error) {
    94  	names = make([]string, 0, 100)
    95  	var buf [STATMAX]byte
    96  
    97  	for {
    98  		n, e := Read(dirfd, buf[:])
    99  		if e != nil {
   100  			return nil, e
   101  		}
   102  		if n == 0 {
   103  			break
   104  		}
   105  		for i := 0; i < n; {
   106  			m, _ := gbit16(buf[i:])
   107  			m += 2
   108  
   109  			if m < STATFIXLEN {
   110  				return nil, ErrBadStat
   111  			}
   112  
   113  			s, _, ok := gstring(buf[i+41:])
   114  			if !ok {
   115  				return nil, ErrBadStat
   116  			}
   117  			names = append(names, s)
   118  			i += int(m)
   119  		}
   120  	}
   121  	return
   122  }
   123  
   124  // readdupdevice returns a list of currently opened fds (excluding stdin, stdout, stderr) from the dup device #d.
   125  // ForkLock should be write locked before calling, so that no new fds would be created while the fd list is being read.
   126  func readdupdevice() (fds []int, err error) {
   127  	dupdevfd, err := Open("#d", O_RDONLY)
   128  	if err != nil {
   129  		return
   130  	}
   131  	defer Close(dupdevfd)
   132  
   133  	names, err := readdirnames(dupdevfd)
   134  	if err != nil {
   135  		return
   136  	}
   137  
   138  	fds = make([]int, 0, len(names)/2)
   139  	for _, name := range names {
   140  		if n := len(name); n > 3 && name[n-3:n] == "ctl" {
   141  			continue
   142  		}
   143  		fd := int(atoi([]byte(name)))
   144  		switch fd {
   145  		case 0, 1, 2, dupdevfd:
   146  			continue
   147  		}
   148  		fds = append(fds, fd)
   149  	}
   150  	return
   151  }
   152  
   153  var startupFds []int
   154  
   155  // Plan 9 does not allow clearing the OCEXEC flag
   156  // from the underlying channel backing an open file descriptor,
   157  // therefore we store a list of already opened file descriptors
   158  // inside startupFds and skip them when manually closing descriptors
   159  // not meant to be passed to a child exec.
   160  func init() {
   161  	startupFds, _ = readdupdevice()
   162  }
   163  
   164  // forkAndExecInChild forks the process, calling dup onto 0..len(fd)
   165  // and finally invoking exec(argv0, argvv, envv) in the child.
   166  // If a dup or exec fails, it writes the error string to pipe.
   167  // (The pipe write end is close-on-exec so if exec succeeds, it will be closed.)
   168  //
   169  // In the child, this function must not acquire any locks, because
   170  // they might have been locked at the time of the fork.  This means
   171  // no rescheduling, no malloc calls, and no new stack segments.
   172  // The calls to RawSyscall are okay because they are assembly
   173  // functions that do not grow the stack.
   174  func forkAndExecInChild(argv0 *byte, argv []*byte, envv []envItem, dir *byte, attr *ProcAttr, fdsToClose []int, pipe int, rflag int) (pid int, err error) {
   175  	// Declare all variables at top in case any
   176  	// declarations require heap allocation (e.g., errbuf).
   177  	var (
   178  		r1       uintptr
   179  		nextfd   int
   180  		i        int
   181  		clearenv int
   182  		envfd    int
   183  		errbuf   [ERRMAX]byte
   184  	)
   185  
   186  	// Guard against side effects of shuffling fds below.
   187  	// Make sure that nextfd is beyond any currently open files so
   188  	// that we can't run the risk of overwriting any of them.
   189  	fd := make([]int, len(attr.Files))
   190  	nextfd = len(attr.Files)
   191  	for i, ufd := range attr.Files {
   192  		if nextfd < int(ufd) {
   193  			nextfd = int(ufd)
   194  		}
   195  		fd[i] = int(ufd)
   196  	}
   197  	nextfd++
   198  
   199  	if envv != nil {
   200  		clearenv = RFCENVG
   201  	}
   202  
   203  	// About to call fork.
   204  	// No more allocation or calls of non-assembly functions.
   205  	r1, _, _ = RawSyscall(SYS_RFORK, uintptr(RFPROC|RFFDG|RFREND|clearenv|rflag), 0, 0)
   206  
   207  	if r1 != 0 {
   208  		if int32(r1) == -1 {
   209  			return 0, NewError(errstr())
   210  		}
   211  		// parent; return PID
   212  		return int(r1), nil
   213  	}
   214  
   215  	// Fork succeeded, now in child.
   216  
   217  	// Close fds we don't need.
   218  	for i = 0; i < len(fdsToClose); i++ {
   219  		r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(fdsToClose[i]), 0, 0)
   220  		if int32(r1) == -1 {
   221  			goto childerror
   222  		}
   223  	}
   224  
   225  	if envv != nil {
   226  		// Write new environment variables.
   227  		for i = 0; i < len(envv); i++ {
   228  			r1, _, _ = RawSyscall(SYS_CREATE, uintptr(unsafe.Pointer(envv[i].name)), uintptr(O_WRONLY), uintptr(0666))
   229  
   230  			if int32(r1) == -1 {
   231  				goto childerror
   232  			}
   233  
   234  			envfd = int(r1)
   235  
   236  			r1, _, _ = RawSyscall6(SYS_PWRITE, uintptr(envfd), uintptr(unsafe.Pointer(envv[i].value)), uintptr(envv[i].nvalue),
   237  				^uintptr(0), ^uintptr(0), 0)
   238  
   239  			if int32(r1) == -1 || int(r1) != envv[i].nvalue {
   240  				goto childerror
   241  			}
   242  
   243  			r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(envfd), 0, 0)
   244  
   245  			if int32(r1) == -1 {
   246  				goto childerror
   247  			}
   248  		}
   249  	}
   250  
   251  	// Chdir
   252  	if dir != nil {
   253  		r1, _, _ = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0)
   254  		if int32(r1) == -1 {
   255  			goto childerror
   256  		}
   257  	}
   258  
   259  	// Pass 1: look for fd[i] < i and move those up above len(fd)
   260  	// so that pass 2 won't stomp on an fd it needs later.
   261  	if pipe < nextfd {
   262  		r1, _, _ = RawSyscall(SYS_DUP, uintptr(pipe), uintptr(nextfd), 0)
   263  		if int32(r1) == -1 {
   264  			goto childerror
   265  		}
   266  		pipe = nextfd
   267  		nextfd++
   268  	}
   269  	for i = 0; i < len(fd); i++ {
   270  		if fd[i] >= 0 && fd[i] < int(i) {
   271  			r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(nextfd), 0)
   272  			if int32(r1) == -1 {
   273  				goto childerror
   274  			}
   275  
   276  			fd[i] = nextfd
   277  			nextfd++
   278  			if nextfd == pipe { // don't stomp on pipe
   279  				nextfd++
   280  			}
   281  		}
   282  	}
   283  
   284  	// Pass 2: dup fd[i] down onto i.
   285  	for i = 0; i < len(fd); i++ {
   286  		if fd[i] == -1 {
   287  			RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
   288  			continue
   289  		}
   290  		if fd[i] == int(i) {
   291  			continue
   292  		}
   293  		r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(i), 0)
   294  		if int32(r1) == -1 {
   295  			goto childerror
   296  		}
   297  	}
   298  
   299  	// Pass 3: close fd[i] if it was moved in the previous pass.
   300  	for i = 0; i < len(fd); i++ {
   301  		if fd[i] >= 0 && fd[i] != int(i) {
   302  			RawSyscall(SYS_CLOSE, uintptr(fd[i]), 0, 0)
   303  		}
   304  	}
   305  
   306  	// Time to exec.
   307  	r1, _, _ = RawSyscall(SYS_EXEC,
   308  		uintptr(unsafe.Pointer(argv0)),
   309  		uintptr(unsafe.Pointer(&argv[0])), 0)
   310  
   311  childerror:
   312  	// send error string on pipe
   313  	RawSyscall(SYS_ERRSTR, uintptr(unsafe.Pointer(&errbuf[0])), uintptr(len(errbuf)), 0)
   314  	errbuf[len(errbuf)-1] = 0
   315  	i = 0
   316  	for i < len(errbuf) && errbuf[i] != 0 {
   317  		i++
   318  	}
   319  
   320  	RawSyscall6(SYS_PWRITE, uintptr(pipe), uintptr(unsafe.Pointer(&errbuf[0])), uintptr(i),
   321  		^uintptr(0), ^uintptr(0), 0)
   322  
   323  	for {
   324  		RawSyscall(SYS_EXITS, 0, 0, 0)
   325  	}
   326  
   327  	// Calling panic is not actually safe,
   328  	// but the for loop above won't break
   329  	// and this shuts up the compiler.
   330  	panic("unreached")
   331  }
   332  
   333  func cexecPipe(p []int) error {
   334  	e := Pipe(p)
   335  	if e != nil {
   336  		return e
   337  	}
   338  
   339  	fd, e := Open("#d/"+itoa(p[1]), O_CLOEXEC)
   340  	if e != nil {
   341  		Close(p[0])
   342  		Close(p[1])
   343  		return e
   344  	}
   345  
   346  	Close(fd)
   347  	return nil
   348  }
   349  
   350  type envItem struct {
   351  	name   *byte
   352  	value  *byte
   353  	nvalue int
   354  }
   355  
   356  type ProcAttr struct {
   357  	Dir   string    // Current working directory.
   358  	Env   []string  // Environment.
   359  	Files []uintptr // File descriptors.
   360  	Sys   *SysProcAttr
   361  }
   362  
   363  type SysProcAttr struct {
   364  	Rfork int // additional flags to pass to rfork
   365  }
   366  
   367  var zeroProcAttr ProcAttr
   368  var zeroSysProcAttr SysProcAttr
   369  
   370  func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   371  	var (
   372  		p      [2]int
   373  		n      int
   374  		errbuf [ERRMAX]byte
   375  		wmsg   Waitmsg
   376  	)
   377  
   378  	if attr == nil {
   379  		attr = &zeroProcAttr
   380  	}
   381  	sys := attr.Sys
   382  	if sys == nil {
   383  		sys = &zeroSysProcAttr
   384  	}
   385  
   386  	p[0] = -1
   387  	p[1] = -1
   388  
   389  	// Convert args to C form.
   390  	argv0p, err := BytePtrFromString(argv0)
   391  	if err != nil {
   392  		return 0, err
   393  	}
   394  	argvp, err := SlicePtrFromStrings(argv)
   395  	if err != nil {
   396  		return 0, err
   397  	}
   398  
   399  	destDir := attr.Dir
   400  	if destDir == "" {
   401  		wdmu.Lock()
   402  		destDir = wdStr
   403  		wdmu.Unlock()
   404  	}
   405  	var dir *byte
   406  	if destDir != "" {
   407  		dir, err = BytePtrFromString(destDir)
   408  		if err != nil {
   409  			return 0, err
   410  		}
   411  	}
   412  	var envvParsed []envItem
   413  	if attr.Env != nil {
   414  		envvParsed = make([]envItem, 0, len(attr.Env))
   415  		for _, v := range attr.Env {
   416  			i := 0
   417  			for i < len(v) && v[i] != '=' {
   418  				i++
   419  			}
   420  
   421  			envname, err := BytePtrFromString("/env/" + v[:i])
   422  			if err != nil {
   423  				return 0, err
   424  			}
   425  			envvalue := make([]byte, len(v)-i)
   426  			copy(envvalue, v[i+1:])
   427  			envvParsed = append(envvParsed, envItem{envname, &envvalue[0], len(v) - i})
   428  		}
   429  	}
   430  
   431  	// Acquire the fork lock to prevent other threads from creating new fds before we fork.
   432  	ForkLock.Lock()
   433  
   434  	// get a list of open fds, excluding stdin,stdout and stderr that need to be closed in the child.
   435  	// no new fds can be created while we hold the ForkLock for writing.
   436  	openFds, e := readdupdevice()
   437  	if e != nil {
   438  		ForkLock.Unlock()
   439  		return 0, e
   440  	}
   441  
   442  	fdsToClose := make([]int, 0, len(openFds))
   443  	for _, fd := range openFds {
   444  		doClose := true
   445  
   446  		// exclude files opened at startup.
   447  		for _, sfd := range startupFds {
   448  			if fd == sfd {
   449  				doClose = false
   450  				break
   451  			}
   452  		}
   453  
   454  		// exclude files explicitly requested by the caller.
   455  		for _, rfd := range attr.Files {
   456  			if fd == int(rfd) {
   457  				doClose = false
   458  				break
   459  			}
   460  		}
   461  
   462  		if doClose {
   463  			fdsToClose = append(fdsToClose, fd)
   464  		}
   465  	}
   466  
   467  	// Allocate child status pipe close on exec.
   468  	e = cexecPipe(p[:])
   469  
   470  	if e != nil {
   471  		return 0, e
   472  	}
   473  	fdsToClose = append(fdsToClose, p[0])
   474  
   475  	// Kick off child.
   476  	pid, err = forkAndExecInChild(argv0p, argvp, envvParsed, dir, attr, fdsToClose, p[1], sys.Rfork)
   477  
   478  	if err != nil {
   479  		if p[0] >= 0 {
   480  			Close(p[0])
   481  			Close(p[1])
   482  		}
   483  		ForkLock.Unlock()
   484  		return 0, err
   485  	}
   486  	ForkLock.Unlock()
   487  
   488  	// Read child error status from pipe.
   489  	Close(p[1])
   490  	n, err = Read(p[0], errbuf[:])
   491  	Close(p[0])
   492  
   493  	if err != nil || n != 0 {
   494  		if n != 0 {
   495  			err = NewError(string(errbuf[:n]))
   496  		}
   497  
   498  		// Child failed; wait for it to exit, to make sure
   499  		// the zombies don't accumulate.
   500  		for wmsg.Pid != pid {
   501  			Await(&wmsg)
   502  		}
   503  		return 0, err
   504  	}
   505  
   506  	// Read got EOF, so pipe closed on exec, so exec succeeded.
   507  	return pid, nil
   508  }
   509  
   510  type waitErr struct {
   511  	Waitmsg
   512  	err error
   513  }
   514  
   515  var procs struct {
   516  	sync.Mutex
   517  	waits map[int]chan *waitErr
   518  }
   519  
   520  // startProcess starts a new goroutine, tied to the OS
   521  // thread, which runs the process and subsequently waits
   522  // for it to finish, communicating the process stats back
   523  // to any goroutines that may have been waiting on it.
   524  //
   525  // Such a dedicated goroutine is needed because on
   526  // Plan 9, only the parent thread can wait for a child,
   527  // whereas goroutines tend to jump OS threads (e.g.,
   528  // between starting a process and running Wait(), the
   529  // goroutine may have been rescheduled).
   530  func startProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   531  	type forkRet struct {
   532  		pid int
   533  		err error
   534  	}
   535  
   536  	forkc := make(chan forkRet, 1)
   537  	go func() {
   538  		runtime.LockOSThread()
   539  		var ret forkRet
   540  
   541  		ret.pid, ret.err = forkExec(argv0, argv, attr)
   542  		// If fork fails there is nothing to wait for.
   543  		if ret.err != nil || ret.pid == 0 {
   544  			forkc <- ret
   545  			return
   546  		}
   547  
   548  		waitc := make(chan *waitErr, 1)
   549  
   550  		// Mark that the process is running.
   551  		procs.Lock()
   552  		if procs.waits == nil {
   553  			procs.waits = make(map[int]chan *waitErr)
   554  		}
   555  		procs.waits[ret.pid] = waitc
   556  		procs.Unlock()
   557  
   558  		forkc <- ret
   559  
   560  		var w waitErr
   561  		for w.err == nil && w.Pid != ret.pid {
   562  			w.err = Await(&w.Waitmsg)
   563  		}
   564  		waitc <- &w
   565  		close(waitc)
   566  	}()
   567  	ret := <-forkc
   568  	return ret.pid, ret.err
   569  }
   570  
   571  // Combination of fork and exec, careful to be thread safe.
   572  func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   573  	return startProcess(argv0, argv, attr)
   574  }
   575  
   576  // StartProcess wraps ForkExec for package os.
   577  func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) {
   578  	pid, err = startProcess(argv0, argv, attr)
   579  	return pid, 0, err
   580  }
   581  
   582  // Ordinary exec.
   583  func Exec(argv0 string, argv []string, envv []string) (err error) {
   584  	if envv != nil {
   585  		r1, _, _ := RawSyscall(SYS_RFORK, RFCENVG, 0, 0)
   586  		if int32(r1) == -1 {
   587  			return NewError(errstr())
   588  		}
   589  
   590  		for _, v := range envv {
   591  			i := 0
   592  			for i < len(v) && v[i] != '=' {
   593  				i++
   594  			}
   595  
   596  			fd, e := Create("/env/"+v[:i], O_WRONLY, 0666)
   597  			if e != nil {
   598  				return e
   599  			}
   600  
   601  			_, e = Write(fd, []byte(v[i+1:]))
   602  			if e != nil {
   603  				Close(fd)
   604  				return e
   605  			}
   606  			Close(fd)
   607  		}
   608  	}
   609  
   610  	argv0p, err := BytePtrFromString(argv0)
   611  	if err != nil {
   612  		return err
   613  	}
   614  	argvp, err := SlicePtrFromStrings(argv)
   615  	if err != nil {
   616  		return err
   617  	}
   618  	_, _, e1 := Syscall(SYS_EXEC,
   619  		uintptr(unsafe.Pointer(argv0p)),
   620  		uintptr(unsafe.Pointer(&argvp[0])),
   621  		0)
   622  
   623  	return e1
   624  }
   625  
   626  // WaitProcess waits until the pid of a
   627  // running process is found in the queue of
   628  // wait messages. It is used in conjunction
   629  // with ForkExec/StartProcess to wait for a
   630  // running process to exit.
   631  func WaitProcess(pid int, w *Waitmsg) (err error) {
   632  	procs.Lock()
   633  	ch := procs.waits[pid]
   634  	procs.Unlock()
   635  
   636  	var wmsg *waitErr
   637  	if ch != nil {
   638  		wmsg = <-ch
   639  		procs.Lock()
   640  		if procs.waits[pid] == ch {
   641  			delete(procs.waits, pid)
   642  		}
   643  		procs.Unlock()
   644  	}
   645  	if wmsg == nil {
   646  		// ch was missing or ch is closed
   647  		return NewError("process not found")
   648  	}
   649  	if wmsg.err != nil {
   650  		return wmsg.err
   651  	}
   652  	if w != nil {
   653  		*w = wmsg.Waitmsg
   654  	}
   655  	return nil
   656  }