github.com/tcnksm/go@v0.0.0-20141208075154-439b32936367/src/syscall/exec_plan9.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Fork, exec, wait, etc.
     6  
     7  package syscall
     8  
     9  import (
    10  	"runtime"
    11  	"sync"
    12  	"unsafe"
    13  )
    14  
    15  // Lock synchronizing creation of new file descriptors with fork.
    16  //
    17  // We want the child in a fork/exec sequence to inherit only the
    18  // file descriptors we intend.  To do that, we mark all file
    19  // descriptors close-on-exec and then, in the child, explicitly
    20  // unmark the ones we want the exec'ed program to keep.
    21  // Unix doesn't make this easy: there is, in general, no way to
    22  // allocate a new file descriptor close-on-exec.  Instead you
    23  // have to allocate the descriptor and then mark it close-on-exec.
    24  // If a fork happens between those two events, the child's exec
    25  // will inherit an unwanted file descriptor.
    26  //
    27  // This lock solves that race: the create new fd/mark close-on-exec
    28  // operation is done holding ForkLock for reading, and the fork itself
    29  // is done holding ForkLock for writing.  At least, that's the idea.
    30  // There are some complications.
    31  //
    32  // Some system calls that create new file descriptors can block
    33  // for arbitrarily long times: open on a hung NFS server or named
    34  // pipe, accept on a socket, and so on.  We can't reasonably grab
    35  // the lock across those operations.
    36  //
    37  // It is worse to inherit some file descriptors than others.
    38  // If a non-malicious child accidentally inherits an open ordinary file,
    39  // that's not a big deal.  On the other hand, if a long-lived child
    40  // accidentally inherits the write end of a pipe, then the reader
    41  // of that pipe will not see EOF until that child exits, potentially
    42  // causing the parent program to hang.  This is a common problem
    43  // in threaded C programs that use popen.
    44  //
    45  // Luckily, the file descriptors that are most important not to
    46  // inherit are not the ones that can take an arbitrarily long time
    47  // to create: pipe returns instantly, and the net package uses
    48  // non-blocking I/O to accept on a listening socket.
    49  // The rules for which file descriptor-creating operations use the
    50  // ForkLock are as follows:
    51  //
    52  // 1) Pipe.    Does not block.  Use the ForkLock.
    53  // 2) Socket.  Does not block.  Use the ForkLock.
    54  // 3) Accept.  If using non-blocking mode, use the ForkLock.
    55  //             Otherwise, live with the race.
    56  // 4) Open.    Can block.  Use O_CLOEXEC if available (Linux).
    57  //             Otherwise, live with the race.
    58  // 5) Dup.     Does not block.  Use the ForkLock.
    59  //             On Linux, could use fcntl F_DUPFD_CLOEXEC
    60  //             instead of the ForkLock, but only for dup(fd, -1).
    61  
    62  var ForkLock sync.RWMutex
    63  
    64  // StringSlicePtr is deprecated. Use SlicePtrFromStrings instead.
    65  // If any string contains a NUL byte this function panics instead
    66  // of returning an error.
    67  func StringSlicePtr(ss []string) []*byte {
    68  	bb := make([]*byte, len(ss)+1)
    69  	for i := 0; i < len(ss); i++ {
    70  		bb[i] = StringBytePtr(ss[i])
    71  	}
    72  	bb[len(ss)] = nil
    73  	return bb
    74  }
    75  
    76  // SlicePtrFromStrings converts a slice of strings to a slice of
    77  // pointers to NUL-terminated byte slices. If any string contains
    78  // a NUL byte, it returns (nil, EINVAL).
    79  func SlicePtrFromStrings(ss []string) ([]*byte, error) {
    80  	var err error
    81  	bb := make([]*byte, len(ss)+1)
    82  	for i := 0; i < len(ss); i++ {
    83  		bb[i], err = BytePtrFromString(ss[i])
    84  		if err != nil {
    85  			return nil, err
    86  		}
    87  	}
    88  	bb[len(ss)] = nil
    89  	return bb, nil
    90  }
    91  
    92  // readdirnames returns the names of files inside the directory represented by dirfd.
    93  func readdirnames(dirfd int) (names []string, err error) {
    94  	names = make([]string, 0, 100)
    95  	var buf [STATMAX]byte
    96  
    97  	for {
    98  		n, e := Read(dirfd, buf[:])
    99  		if e != nil {
   100  			return nil, e
   101  		}
   102  		if n == 0 {
   103  			break
   104  		}
   105  		for i := 0; i < n; {
   106  			m, _ := gbit16(buf[i:])
   107  			m += 2
   108  
   109  			if m < STATFIXLEN {
   110  				return nil, ErrBadStat
   111  			}
   112  
   113  			s, _, ok := gstring(buf[i+41:])
   114  			if !ok {
   115  				return nil, ErrBadStat
   116  			}
   117  			names = append(names, s)
   118  			i += int(m)
   119  		}
   120  	}
   121  	return
   122  }
   123  
   124  // readdupdevice returns a list of currently opened fds (excluding stdin, stdout, stderr) from the dup device #d.
   125  // ForkLock should be write locked before calling, so that no new fds would be created while the fd list is being read.
   126  func readdupdevice() (fds []int, err error) {
   127  	dupdevfd, err := Open("#d", O_RDONLY)
   128  	if err != nil {
   129  		return
   130  	}
   131  	defer Close(dupdevfd)
   132  
   133  	names, err := readdirnames(dupdevfd)
   134  	if err != nil {
   135  		return
   136  	}
   137  
   138  	fds = make([]int, 0, len(names)/2)
   139  	for _, name := range names {
   140  		if n := len(name); n > 3 && name[n-3:n] == "ctl" {
   141  			continue
   142  		}
   143  		fd := int(atoi([]byte(name)))
   144  		switch fd {
   145  		case 0, 1, 2, dupdevfd:
   146  			continue
   147  		}
   148  		fds = append(fds, fd)
   149  	}
   150  	return
   151  }
   152  
   153  var startupFds []int
   154  
   155  // Plan 9 does not allow clearing the OCEXEC flag
   156  // from the underlying channel backing an open file descriptor,
   157  // therefore we store a list of already opened file descriptors
   158  // inside startupFds and skip them when manually closing descriptors
   159  // not meant to be passed to a child exec.
   160  func init() {
   161  	startupFds, _ = readdupdevice()
   162  }
   163  
   164  // forkAndExecInChild forks the process, calling dup onto 0..len(fd)
   165  // and finally invoking exec(argv0, argvv, envv) in the child.
   166  // If a dup or exec fails, it writes the error string to pipe.
   167  // (The pipe write end is close-on-exec so if exec succeeds, it will be closed.)
   168  //
   169  // In the child, this function must not acquire any locks, because
   170  // they might have been locked at the time of the fork.  This means
   171  // no rescheduling, no malloc calls, and no new stack segments.
   172  // The calls to RawSyscall are okay because they are assembly
   173  // functions that do not grow the stack.
   174  func forkAndExecInChild(argv0 *byte, argv []*byte, envv []envItem, dir *byte, attr *ProcAttr, fdsToClose []int, pipe int, rflag int) (pid int, err error) {
   175  	// Declare all variables at top in case any
   176  	// declarations require heap allocation (e.g., errbuf).
   177  	var (
   178  		r1       uintptr
   179  		nextfd   int
   180  		i        int
   181  		clearenv int
   182  		envfd    int
   183  		errbuf   [ERRMAX]byte
   184  	)
   185  
   186  	// Guard against side effects of shuffling fds below.
   187  	// Make sure that nextfd is beyond any currently open files so
   188  	// that we can't run the risk of overwriting any of them.
   189  	fd := make([]int, len(attr.Files))
   190  	nextfd = len(attr.Files)
   191  	for i, ufd := range attr.Files {
   192  		if nextfd < int(ufd) {
   193  			nextfd = int(ufd)
   194  		}
   195  		fd[i] = int(ufd)
   196  	}
   197  	nextfd++
   198  
   199  	if envv != nil {
   200  		clearenv = RFCENVG
   201  	}
   202  
   203  	// About to call fork.
   204  	// No more allocation or calls of non-assembly functions.
   205  	r1, _, _ = RawSyscall(SYS_RFORK, uintptr(RFPROC|RFFDG|RFREND|clearenv|rflag), 0, 0)
   206  
   207  	if r1 != 0 {
   208  		if int32(r1) == -1 {
   209  			return 0, NewError(errstr())
   210  		}
   211  		// parent; return PID
   212  		return int(r1), nil
   213  	}
   214  
   215  	// Fork succeeded, now in child.
   216  
   217  	// Close fds we don't need.
   218  	for i = 0; i < len(fdsToClose); i++ {
   219  		r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(fdsToClose[i]), 0, 0)
   220  		if int32(r1) == -1 {
   221  			goto childerror
   222  		}
   223  	}
   224  
   225  	if envv != nil {
   226  		// Write new environment variables.
   227  		for i = 0; i < len(envv); i++ {
   228  			r1, _, _ = RawSyscall(SYS_CREATE, uintptr(unsafe.Pointer(envv[i].name)), uintptr(O_WRONLY), uintptr(0666))
   229  
   230  			if int32(r1) == -1 {
   231  				goto childerror
   232  			}
   233  
   234  			envfd = int(r1)
   235  
   236  			r1, _, _ = RawSyscall6(SYS_PWRITE, uintptr(envfd), uintptr(unsafe.Pointer(envv[i].value)), uintptr(envv[i].nvalue),
   237  				^uintptr(0), ^uintptr(0), 0)
   238  
   239  			if int32(r1) == -1 || int(r1) != envv[i].nvalue {
   240  				goto childerror
   241  			}
   242  
   243  			r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(envfd), 0, 0)
   244  
   245  			if int32(r1) == -1 {
   246  				goto childerror
   247  			}
   248  		}
   249  	}
   250  
   251  	// Chdir
   252  	if dir != nil {
   253  		r1, _, _ = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0)
   254  		if int32(r1) == -1 {
   255  			goto childerror
   256  		}
   257  	}
   258  
   259  	// Pass 1: look for fd[i] < i and move those up above len(fd)
   260  	// so that pass 2 won't stomp on an fd it needs later.
   261  	if pipe < nextfd {
   262  		r1, _, _ = RawSyscall(SYS_DUP, uintptr(pipe), uintptr(nextfd), 0)
   263  		if int32(r1) == -1 {
   264  			goto childerror
   265  		}
   266  		pipe = nextfd
   267  		nextfd++
   268  	}
   269  	for i = 0; i < len(fd); i++ {
   270  		if fd[i] >= 0 && fd[i] < int(i) {
   271  			r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(nextfd), 0)
   272  			if int32(r1) == -1 {
   273  				goto childerror
   274  			}
   275  
   276  			fd[i] = nextfd
   277  			nextfd++
   278  			if nextfd == pipe { // don't stomp on pipe
   279  				nextfd++
   280  			}
   281  		}
   282  	}
   283  
   284  	// Pass 2: dup fd[i] down onto i.
   285  	for i = 0; i < len(fd); i++ {
   286  		if fd[i] == -1 {
   287  			RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
   288  			continue
   289  		}
   290  		if fd[i] == int(i) {
   291  			continue
   292  		}
   293  		r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(i), 0)
   294  		if int32(r1) == -1 {
   295  			goto childerror
   296  		}
   297  	}
   298  
   299  	// Pass 3: close fd[i] if it was moved in the previous pass.
   300  	for i = 0; i < len(fd); i++ {
   301  		if fd[i] >= 0 && fd[i] != int(i) {
   302  			RawSyscall(SYS_CLOSE, uintptr(fd[i]), 0, 0)
   303  		}
   304  	}
   305  
   306  	// Time to exec.
   307  	r1, _, _ = RawSyscall(SYS_EXEC,
   308  		uintptr(unsafe.Pointer(argv0)),
   309  		uintptr(unsafe.Pointer(&argv[0])), 0)
   310  
   311  childerror:
   312  	// send error string on pipe
   313  	RawSyscall(SYS_ERRSTR, uintptr(unsafe.Pointer(&errbuf[0])), uintptr(len(errbuf)), 0)
   314  	errbuf[len(errbuf)-1] = 0
   315  	i = 0
   316  	for i < len(errbuf) && errbuf[i] != 0 {
   317  		i++
   318  	}
   319  
   320  	RawSyscall6(SYS_PWRITE, uintptr(pipe), uintptr(unsafe.Pointer(&errbuf[0])), uintptr(i),
   321  		^uintptr(0), ^uintptr(0), 0)
   322  
   323  	for {
   324  		RawSyscall(SYS_EXITS, 0, 0, 0)
   325  	}
   326  
   327  	// Calling panic is not actually safe,
   328  	// but the for loop above won't break
   329  	// and this shuts up the compiler.
   330  	panic("unreached")
   331  }
   332  
   333  func cexecPipe(p []int) error {
   334  	e := Pipe(p)
   335  	if e != nil {
   336  		return e
   337  	}
   338  
   339  	fd, e := Open("#d/"+itoa(p[1]), O_CLOEXEC)
   340  	if e != nil {
   341  		Close(p[0])
   342  		Close(p[1])
   343  		return e
   344  	}
   345  
   346  	Close(fd)
   347  	return nil
   348  }
   349  
   350  type envItem struct {
   351  	name   *byte
   352  	value  *byte
   353  	nvalue int
   354  }
   355  
   356  type ProcAttr struct {
   357  	Dir   string    // Current working directory.
   358  	Env   []string  // Environment.
   359  	Files []uintptr // File descriptors.
   360  	Sys   *SysProcAttr
   361  }
   362  
   363  type SysProcAttr struct {
   364  	Rfork int // additional flags to pass to rfork
   365  }
   366  
   367  var zeroProcAttr ProcAttr
   368  var zeroSysProcAttr SysProcAttr
   369  
   370  func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   371  	var (
   372  		p      [2]int
   373  		n      int
   374  		errbuf [ERRMAX]byte
   375  		wmsg   Waitmsg
   376  	)
   377  
   378  	if attr == nil {
   379  		attr = &zeroProcAttr
   380  	}
   381  	sys := attr.Sys
   382  	if sys == nil {
   383  		sys = &zeroSysProcAttr
   384  	}
   385  
   386  	p[0] = -1
   387  	p[1] = -1
   388  
   389  	// Convert args to C form.
   390  	argv0p, err := BytePtrFromString(argv0)
   391  	if err != nil {
   392  		return 0, err
   393  	}
   394  	argvp, err := SlicePtrFromStrings(argv)
   395  	if err != nil {
   396  		return 0, err
   397  	}
   398  
   399  	var dir *byte
   400  	if attr.Dir != "" {
   401  		dir, err = BytePtrFromString(attr.Dir)
   402  		if err != nil {
   403  			return 0, err
   404  		}
   405  	}
   406  	var envvParsed []envItem
   407  	if attr.Env != nil {
   408  		envvParsed = make([]envItem, 0, len(attr.Env))
   409  		for _, v := range attr.Env {
   410  			i := 0
   411  			for i < len(v) && v[i] != '=' {
   412  				i++
   413  			}
   414  
   415  			envname, err := BytePtrFromString("/env/" + v[:i])
   416  			if err != nil {
   417  				return 0, err
   418  			}
   419  			envvalue := make([]byte, len(v)-i)
   420  			copy(envvalue, v[i+1:])
   421  			envvParsed = append(envvParsed, envItem{envname, &envvalue[0], len(v) - i})
   422  		}
   423  	}
   424  
   425  	// Acquire the fork lock to prevent other threads from creating new fds before we fork.
   426  	ForkLock.Lock()
   427  
   428  	// get a list of open fds, excluding stdin,stdout and stderr that need to be closed in the child.
   429  	// no new fds can be created while we hold the ForkLock for writing.
   430  	openFds, e := readdupdevice()
   431  	if e != nil {
   432  		ForkLock.Unlock()
   433  		return 0, e
   434  	}
   435  
   436  	fdsToClose := make([]int, 0, len(openFds))
   437  	for _, fd := range openFds {
   438  		doClose := true
   439  
   440  		// exclude files opened at startup.
   441  		for _, sfd := range startupFds {
   442  			if fd == sfd {
   443  				doClose = false
   444  				break
   445  			}
   446  		}
   447  
   448  		// exclude files explicitly requested by the caller.
   449  		for _, rfd := range attr.Files {
   450  			if fd == int(rfd) {
   451  				doClose = false
   452  				break
   453  			}
   454  		}
   455  
   456  		if doClose {
   457  			fdsToClose = append(fdsToClose, fd)
   458  		}
   459  	}
   460  
   461  	// Allocate child status pipe close on exec.
   462  	e = cexecPipe(p[:])
   463  
   464  	if e != nil {
   465  		return 0, e
   466  	}
   467  	fdsToClose = append(fdsToClose, p[0])
   468  
   469  	// Kick off child.
   470  	pid, err = forkAndExecInChild(argv0p, argvp, envvParsed, dir, attr, fdsToClose, p[1], sys.Rfork)
   471  
   472  	if err != nil {
   473  		if p[0] >= 0 {
   474  			Close(p[0])
   475  			Close(p[1])
   476  		}
   477  		ForkLock.Unlock()
   478  		return 0, err
   479  	}
   480  	ForkLock.Unlock()
   481  
   482  	// Read child error status from pipe.
   483  	Close(p[1])
   484  	n, err = Read(p[0], errbuf[:])
   485  	Close(p[0])
   486  
   487  	if err != nil || n != 0 {
   488  		if n != 0 {
   489  			err = NewError(string(errbuf[:n]))
   490  		}
   491  
   492  		// Child failed; wait for it to exit, to make sure
   493  		// the zombies don't accumulate.
   494  		for wmsg.Pid != pid {
   495  			Await(&wmsg)
   496  		}
   497  		return 0, err
   498  	}
   499  
   500  	// Read got EOF, so pipe closed on exec, so exec succeeded.
   501  	return pid, nil
   502  }
   503  
   504  type waitErr struct {
   505  	Waitmsg
   506  	err error
   507  }
   508  
   509  var procs struct {
   510  	sync.Mutex
   511  	waits map[int]chan *waitErr
   512  }
   513  
   514  // startProcess starts a new goroutine, tied to the OS
   515  // thread, which runs the process and subsequently waits
   516  // for it to finish, communicating the process stats back
   517  // to any goroutines that may have been waiting on it.
   518  //
   519  // Such a dedicated goroutine is needed because on
   520  // Plan 9, only the parent thread can wait for a child,
   521  // whereas goroutines tend to jump OS threads (e.g.,
   522  // between starting a process and running Wait(), the
   523  // goroutine may have been rescheduled).
   524  func startProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   525  	type forkRet struct {
   526  		pid int
   527  		err error
   528  	}
   529  
   530  	forkc := make(chan forkRet, 1)
   531  	go func() {
   532  		runtime.LockOSThread()
   533  		var ret forkRet
   534  
   535  		ret.pid, ret.err = forkExec(argv0, argv, attr)
   536  		// If fork fails there is nothing to wait for.
   537  		if ret.err != nil || ret.pid == 0 {
   538  			forkc <- ret
   539  			return
   540  		}
   541  
   542  		waitc := make(chan *waitErr, 1)
   543  
   544  		// Mark that the process is running.
   545  		procs.Lock()
   546  		if procs.waits == nil {
   547  			procs.waits = make(map[int]chan *waitErr)
   548  		}
   549  		procs.waits[ret.pid] = waitc
   550  		procs.Unlock()
   551  
   552  		forkc <- ret
   553  
   554  		var w waitErr
   555  		for w.err == nil && w.Pid != ret.pid {
   556  			w.err = Await(&w.Waitmsg)
   557  		}
   558  		waitc <- &w
   559  		close(waitc)
   560  	}()
   561  	ret := <-forkc
   562  	return ret.pid, ret.err
   563  }
   564  
   565  // Combination of fork and exec, careful to be thread safe.
   566  func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   567  	return startProcess(argv0, argv, attr)
   568  }
   569  
   570  // StartProcess wraps ForkExec for package os.
   571  func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) {
   572  	pid, err = startProcess(argv0, argv, attr)
   573  	return pid, 0, err
   574  }
   575  
   576  // Ordinary exec.
   577  func Exec(argv0 string, argv []string, envv []string) (err error) {
   578  	if envv != nil {
   579  		r1, _, _ := RawSyscall(SYS_RFORK, RFCENVG, 0, 0)
   580  		if int32(r1) == -1 {
   581  			return NewError(errstr())
   582  		}
   583  
   584  		for _, v := range envv {
   585  			i := 0
   586  			for i < len(v) && v[i] != '=' {
   587  				i++
   588  			}
   589  
   590  			fd, e := Create("/env/"+v[:i], O_WRONLY, 0666)
   591  			if e != nil {
   592  				return e
   593  			}
   594  
   595  			_, e = Write(fd, []byte(v[i+1:]))
   596  			if e != nil {
   597  				Close(fd)
   598  				return e
   599  			}
   600  			Close(fd)
   601  		}
   602  	}
   603  
   604  	argv0p, err := BytePtrFromString(argv0)
   605  	if err != nil {
   606  		return err
   607  	}
   608  	argvp, err := SlicePtrFromStrings(argv)
   609  	if err != nil {
   610  		return err
   611  	}
   612  	_, _, e1 := Syscall(SYS_EXEC,
   613  		uintptr(unsafe.Pointer(argv0p)),
   614  		uintptr(unsafe.Pointer(&argvp[0])),
   615  		0)
   616  
   617  	return e1
   618  }
   619  
   620  // WaitProcess waits until the pid of a
   621  // running process is found in the queue of
   622  // wait messages. It is used in conjunction
   623  // with ForkExec/StartProcess to wait for a
   624  // running process to exit.
   625  func WaitProcess(pid int, w *Waitmsg) (err error) {
   626  	procs.Lock()
   627  	ch := procs.waits[pid]
   628  	procs.Unlock()
   629  
   630  	var wmsg *waitErr
   631  	if ch != nil {
   632  		wmsg = <-ch
   633  		procs.Lock()
   634  		if procs.waits[pid] == ch {
   635  			delete(procs.waits, pid)
   636  		}
   637  		procs.Unlock()
   638  	}
   639  	if wmsg == nil {
   640  		// ch was missing or ch is closed
   641  		return NewError("process not found")
   642  	}
   643  	if wmsg.err != nil {
   644  		return wmsg.err
   645  	}
   646  	if w != nil {
   647  		*w = wmsg.Waitmsg
   648  	}
   649  	return nil
   650  }