github.com/fjballest/golang@v0.0.0-20151209143359-e4c5fe594ca8/src/syscall/exec_plan9.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Fork, exec, wait, etc.
     6  
     7  package syscall
     8  
     9  import (
    10  	"runtime"
    11  	"sync"
    12  	"unsafe"
    13  )
    14  
    15  // Lock synchronizing creation of new file descriptors with fork.
    16  //
    17  // We want the child in a fork/exec sequence to inherit only the
    18  // file descriptors we intend.  To do that, we mark all file
    19  // descriptors close-on-exec and then, in the child, explicitly
    20  // unmark the ones we want the exec'ed program to keep.
    21  // Unix doesn't make this easy: there is, in general, no way to
    22  // allocate a new file descriptor close-on-exec.  Instead you
    23  // have to allocate the descriptor and then mark it close-on-exec.
    24  // If a fork happens between those two events, the child's exec
    25  // will inherit an unwanted file descriptor.
    26  //
    27  // This lock solves that race: the create new fd/mark close-on-exec
    28  // operation is done holding ForkLock for reading, and the fork itself
    29  // is done holding ForkLock for writing.  At least, that's the idea.
    30  // There are some complications.
    31  //
    32  // Some system calls that create new file descriptors can block
    33  // for arbitrarily long times: open on a hung NFS server or named
    34  // pipe, accept on a socket, and so on.  We can't reasonably grab
    35  // the lock across those operations.
    36  //
    37  // It is worse to inherit some file descriptors than others.
    38  // If a non-malicious child accidentally inherits an open ordinary file,
    39  // that's not a big deal.  On the other hand, if a long-lived child
    40  // accidentally inherits the write end of a pipe, then the reader
    41  // of that pipe will not see EOF until that child exits, potentially
    42  // causing the parent program to hang.  This is a common problem
    43  // in threaded C programs that use popen.
    44  //
    45  // Luckily, the file descriptors that are most important not to
    46  // inherit are not the ones that can take an arbitrarily long time
    47  // to create: pipe returns instantly, and the net package uses
    48  // non-blocking I/O to accept on a listening socket.
    49  // The rules for which file descriptor-creating operations use the
    50  // ForkLock are as follows:
    51  //
    52  // 1) Pipe.    Does not block.  Use the ForkLock.
    53  // 2) Socket.  Does not block.  Use the ForkLock.
    54  // 3) Accept.  If using non-blocking mode, use the ForkLock.
    55  //             Otherwise, live with the race.
    56  // 4) Open.    Can block.  Use O_CLOEXEC if available (Linux).
    57  //             Otherwise, live with the race.
    58  // 5) Dup.     Does not block.  Use the ForkLock.
    59  //             On Linux, could use fcntl F_DUPFD_CLOEXEC
    60  //             instead of the ForkLock, but only for dup(fd, -1).
    61  
    62  var ForkLock sync.RWMutex
    63  
    64  // StringSlicePtr converts a slice of strings to a slice of pointers
    65  // to NUL-terminated byte arrays. If any string contains a NUL byte
    66  // this function panics instead of returning an error.
    67  //
    68  // Deprecated: Use SlicePtrFromStrings instead.
    69  func StringSlicePtr(ss []string) []*byte {
    70  	bb := make([]*byte, len(ss)+1)
    71  	for i := 0; i < len(ss); i++ {
    72  		bb[i] = StringBytePtr(ss[i])
    73  	}
    74  	bb[len(ss)] = nil
    75  	return bb
    76  }
    77  
    78  // SlicePtrFromStrings converts a slice of strings to a slice of
    79  // pointers to NUL-terminated byte arrays. If any string contains
    80  // a NUL byte, it returns (nil, EINVAL).
    81  func SlicePtrFromStrings(ss []string) ([]*byte, error) {
    82  	var err error
    83  	bb := make([]*byte, len(ss)+1)
    84  	for i := 0; i < len(ss); i++ {
    85  		bb[i], err = BytePtrFromString(ss[i])
    86  		if err != nil {
    87  			return nil, err
    88  		}
    89  	}
    90  	bb[len(ss)] = nil
    91  	return bb, nil
    92  }
    93  
    94  // readdirnames returns the names of files inside the directory represented by dirfd.
    95  func readdirnames(dirfd int) (names []string, err error) {
    96  	names = make([]string, 0, 100)
    97  	var buf [STATMAX]byte
    98  
    99  	for {
   100  		n, e := Read(dirfd, buf[:])
   101  		if e != nil {
   102  			return nil, e
   103  		}
   104  		if n == 0 {
   105  			break
   106  		}
   107  		for i := 0; i < n; {
   108  			m, _ := gbit16(buf[i:])
   109  			m += 2
   110  
   111  			if m < STATFIXLEN {
   112  				return nil, ErrBadStat
   113  			}
   114  
   115  			s, _, ok := gstring(buf[i+41:])
   116  			if !ok {
   117  				return nil, ErrBadStat
   118  			}
   119  			names = append(names, s)
   120  			i += int(m)
   121  		}
   122  	}
   123  	return
   124  }
   125  
   126  // readdupdevice returns a list of currently opened fds (excluding stdin, stdout, stderr) from the dup device #d.
   127  // ForkLock should be write locked before calling, so that no new fds would be created while the fd list is being read.
   128  func readdupdevice() (fds []int, err error) {
   129  	dupdevfd, err := Open("#d", O_RDONLY)
   130  	if err != nil {
   131  		return
   132  	}
   133  	defer Close(dupdevfd)
   134  
   135  	names, err := readdirnames(dupdevfd)
   136  	if err != nil {
   137  		return
   138  	}
   139  
   140  	fds = make([]int, 0, len(names)/2)
   141  	for _, name := range names {
   142  		if n := len(name); n > 3 && name[n-3:n] == "ctl" {
   143  			continue
   144  		}
   145  		fd := int(atoi([]byte(name)))
   146  		switch fd {
   147  		case 0, 1, 2, dupdevfd:
   148  			continue
   149  		}
   150  		fds = append(fds, fd)
   151  	}
   152  	return
   153  }
   154  
   155  var startupFds []int
   156  
   157  // Plan 9 does not allow clearing the OCEXEC flag
   158  // from the underlying channel backing an open file descriptor,
   159  // therefore we store a list of already opened file descriptors
   160  // inside startupFds and skip them when manually closing descriptors
   161  // not meant to be passed to a child exec.
   162  func init() {
   163  	startupFds, _ = readdupdevice()
   164  }
   165  
   166  // forkAndExecInChild forks the process, calling dup onto 0..len(fd)
   167  // and finally invoking exec(argv0, argvv, envv) in the child.
   168  // If a dup or exec fails, it writes the error string to pipe.
   169  // (The pipe write end is close-on-exec so if exec succeeds, it will be closed.)
   170  //
   171  // In the child, this function must not acquire any locks, because
   172  // they might have been locked at the time of the fork.  This means
   173  // no rescheduling, no malloc calls, and no new stack segments.
   174  // The calls to RawSyscall are okay because they are assembly
   175  // functions that do not grow the stack.
   176  //go:norace
   177  func forkAndExecInChild(argv0 *byte, argv []*byte, envv []envItem, dir *byte, attr *ProcAttr, fdsToClose []int, pipe int, rflag int) (pid int, err error) {
   178  	// Declare all variables at top in case any
   179  	// declarations require heap allocation (e.g., errbuf).
   180  	var (
   181  		r1       uintptr
   182  		nextfd   int
   183  		i        int
   184  		clearenv int
   185  		envfd    int
   186  		errbuf   [ERRMAX]byte
   187  	)
   188  
   189  	// Guard against side effects of shuffling fds below.
   190  	// Make sure that nextfd is beyond any currently open files so
   191  	// that we can't run the risk of overwriting any of them.
   192  	fd := make([]int, len(attr.Files))
   193  	nextfd = len(attr.Files)
   194  	for i, ufd := range attr.Files {
   195  		if nextfd < int(ufd) {
   196  			nextfd = int(ufd)
   197  		}
   198  		fd[i] = int(ufd)
   199  	}
   200  	nextfd++
   201  
   202  	if envv != nil {
   203  		clearenv = RFCENVG
   204  	}
   205  
   206  	// About to call fork.
   207  	// No more allocation or calls of non-assembly functions.
   208  	r1, _, _ = RawSyscall(SYS_RFORK, uintptr(RFPROC|RFFDG|RFREND|clearenv|rflag), 0, 0)
   209  
   210  	if r1 != 0 {
   211  		if int32(r1) == -1 {
   212  			return 0, NewError(errstr())
   213  		}
   214  		// parent; return PID
   215  		return int(r1), nil
   216  	}
   217  
   218  	// Fork succeeded, now in child.
   219  
   220  	// Close fds we don't need.
   221  	for i = 0; i < len(fdsToClose); i++ {
   222  		RawSyscall(SYS_CLOSE, uintptr(fdsToClose[i]), 0, 0)
   223  	}
   224  
   225  	if envv != nil {
   226  		// Write new environment variables.
   227  		for i = 0; i < len(envv); i++ {
   228  			r1, _, _ = RawSyscall(SYS_CREATE, uintptr(unsafe.Pointer(envv[i].name)), uintptr(O_WRONLY), uintptr(0666))
   229  
   230  			if int32(r1) == -1 {
   231  				goto childerror
   232  			}
   233  
   234  			envfd = int(r1)
   235  
   236  			r1, _, _ = RawSyscall6(SYS_PWRITE, uintptr(envfd), uintptr(unsafe.Pointer(envv[i].value)), uintptr(envv[i].nvalue),
   237  				^uintptr(0), ^uintptr(0), 0)
   238  
   239  			if int32(r1) == -1 || int(r1) != envv[i].nvalue {
   240  				goto childerror
   241  			}
   242  
   243  			r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(envfd), 0, 0)
   244  
   245  			if int32(r1) == -1 {
   246  				goto childerror
   247  			}
   248  		}
   249  	}
   250  
   251  	// Chdir
   252  	if dir != nil {
   253  		r1, _, _ = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0)
   254  		if int32(r1) == -1 {
   255  			goto childerror
   256  		}
   257  	}
   258  
   259  	// Pass 1: look for fd[i] < i and move those up above len(fd)
   260  	// so that pass 2 won't stomp on an fd it needs later.
   261  	if pipe < nextfd {
   262  		r1, _, _ = RawSyscall(SYS_DUP, uintptr(pipe), uintptr(nextfd), 0)
   263  		if int32(r1) == -1 {
   264  			goto childerror
   265  		}
   266  		pipe = nextfd
   267  		nextfd++
   268  	}
   269  	for i = 0; i < len(fd); i++ {
   270  		if fd[i] >= 0 && fd[i] < int(i) {
   271  			r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(nextfd), 0)
   272  			if int32(r1) == -1 {
   273  				goto childerror
   274  			}
   275  
   276  			fd[i] = nextfd
   277  			nextfd++
   278  			if nextfd == pipe { // don't stomp on pipe
   279  				nextfd++
   280  			}
   281  		}
   282  	}
   283  
   284  	// Pass 2: dup fd[i] down onto i.
   285  	for i = 0; i < len(fd); i++ {
   286  		if fd[i] == -1 {
   287  			RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
   288  			continue
   289  		}
   290  		if fd[i] == int(i) {
   291  			continue
   292  		}
   293  		r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(i), 0)
   294  		if int32(r1) == -1 {
   295  			goto childerror
   296  		}
   297  	}
   298  
   299  	// Pass 3: close fd[i] if it was moved in the previous pass.
   300  	for i = 0; i < len(fd); i++ {
   301  		if fd[i] >= 0 && fd[i] != int(i) {
   302  			RawSyscall(SYS_CLOSE, uintptr(fd[i]), 0, 0)
   303  		}
   304  	}
   305  
   306  	// Time to exec.
   307  	r1, _, _ = RawSyscall(SYS_EXEC,
   308  		uintptr(unsafe.Pointer(argv0)),
   309  		uintptr(unsafe.Pointer(&argv[0])), 0)
   310  
   311  childerror:
   312  	// send error string on pipe
   313  	RawSyscall(SYS_ERRSTR, uintptr(unsafe.Pointer(&errbuf[0])), uintptr(len(errbuf)), 0)
   314  	errbuf[len(errbuf)-1] = 0
   315  	i = 0
   316  	for i < len(errbuf) && errbuf[i] != 0 {
   317  		i++
   318  	}
   319  
   320  	RawSyscall6(SYS_PWRITE, uintptr(pipe), uintptr(unsafe.Pointer(&errbuf[0])), uintptr(i),
   321  		^uintptr(0), ^uintptr(0), 0)
   322  
   323  	for {
   324  		RawSyscall(SYS_EXITS, 0, 0, 0)
   325  	}
   326  
   327  	// Calling panic is not actually safe,
   328  	// but the for loop above won't break
   329  	// and this shuts up the compiler.
   330  	panic("unreached")
   331  }
   332  
   333  func cexecPipe(p []int) error {
   334  	e := Pipe(p)
   335  	if e != nil {
   336  		return e
   337  	}
   338  
   339  	fd, e := Open("#d/"+itoa(p[1]), O_CLOEXEC)
   340  	if e != nil {
   341  		Close(p[0])
   342  		Close(p[1])
   343  		return e
   344  	}
   345  
   346  	Close(fd)
   347  	return nil
   348  }
   349  
   350  type envItem struct {
   351  	name   *byte
   352  	value  *byte
   353  	nvalue int
   354  }
   355  
   356  type ProcAttr struct {
   357  	Dir   string    // Current working directory.
   358  	Env   []string  // Environment.
   359  	Files []uintptr // File descriptors.
   360  	Sys   *SysProcAttr
   361  }
   362  
   363  type SysProcAttr struct {
   364  	Rfork int // additional flags to pass to rfork
   365  }
   366  
   367  var zeroProcAttr ProcAttr
   368  var zeroSysProcAttr SysProcAttr
   369  
   370  func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   371  	var (
   372  		p      [2]int
   373  		n      int
   374  		errbuf [ERRMAX]byte
   375  		wmsg   Waitmsg
   376  	)
   377  
   378  	if attr == nil {
   379  		attr = &zeroProcAttr
   380  	}
   381  	sys := attr.Sys
   382  	if sys == nil {
   383  		sys = &zeroSysProcAttr
   384  	}
   385  
   386  	p[0] = -1
   387  	p[1] = -1
   388  
   389  	// Convert args to C form.
   390  	argv0p, err := BytePtrFromString(argv0)
   391  	if err != nil {
   392  		return 0, err
   393  	}
   394  	argvp, err := SlicePtrFromStrings(argv)
   395  	if err != nil {
   396  		return 0, err
   397  	}
   398  
   399  	destDir := attr.Dir
   400  	if destDir == "" {
   401  		wdmu.Lock()
   402  		destDir = wdStr
   403  		wdmu.Unlock()
   404  	}
   405  	var dir *byte
   406  	if destDir != "" {
   407  		dir, err = BytePtrFromString(destDir)
   408  		if err != nil {
   409  			return 0, err
   410  		}
   411  	}
   412  	var envvParsed []envItem
   413  	if attr.Env != nil {
   414  		envvParsed = make([]envItem, 0, len(attr.Env))
   415  		for _, v := range attr.Env {
   416  			i := 0
   417  			for i < len(v) && v[i] != '=' {
   418  				i++
   419  			}
   420  
   421  			envname, err := BytePtrFromString("/env/" + v[:i])
   422  			if err != nil {
   423  				return 0, err
   424  			}
   425  			envvalue := make([]byte, len(v)-i)
   426  			copy(envvalue, v[i+1:])
   427  			envvParsed = append(envvParsed, envItem{envname, &envvalue[0], len(v) - i})
   428  		}
   429  	}
   430  
   431  	// Acquire the fork lock to prevent other threads from creating new fds before we fork.
   432  	ForkLock.Lock()
   433  
   434  	// get a list of open fds, excluding stdin,stdout and stderr that need to be closed in the child.
   435  	// no new fds can be created while we hold the ForkLock for writing.
   436  	openFds, e := readdupdevice()
   437  	if e != nil {
   438  		ForkLock.Unlock()
   439  		return 0, e
   440  	}
   441  
   442  	fdsToClose := make([]int, 0, len(openFds))
   443  	for _, fd := range openFds {
   444  		doClose := true
   445  
   446  		// exclude files opened at startup.
   447  		for _, sfd := range startupFds {
   448  			if fd == sfd {
   449  				doClose = false
   450  				break
   451  			}
   452  		}
   453  
   454  		// exclude files explicitly requested by the caller.
   455  		for _, rfd := range attr.Files {
   456  			if fd == int(rfd) {
   457  				doClose = false
   458  				break
   459  			}
   460  		}
   461  
   462  		if doClose {
   463  			fdsToClose = append(fdsToClose, fd)
   464  		}
   465  	}
   466  
   467  	// Allocate child status pipe close on exec.
   468  	e = cexecPipe(p[:])
   469  
   470  	if e != nil {
   471  		return 0, e
   472  	}
   473  	fdsToClose = append(fdsToClose, p[0])
   474  
   475  	// Kick off child.
   476  	pid, err = forkAndExecInChild(argv0p, argvp, envvParsed, dir, attr, fdsToClose, p[1], sys.Rfork)
   477  
   478  	if err != nil {
   479  		if p[0] >= 0 {
   480  			Close(p[0])
   481  			Close(p[1])
   482  		}
   483  		ForkLock.Unlock()
   484  		return 0, err
   485  	}
   486  	ForkLock.Unlock()
   487  
   488  	// Read child error status from pipe.
   489  	Close(p[1])
   490  	n, err = Read(p[0], errbuf[:])
   491  	Close(p[0])
   492  
   493  	if err != nil || n != 0 {
   494  		if n != 0 {
   495  			err = NewError(string(errbuf[:n]))
   496  		}
   497  
   498  		// Child failed; wait for it to exit, to make sure
   499  		// the zombies don't accumulate.
   500  		for wmsg.Pid != pid {
   501  			Await(&wmsg)
   502  		}
   503  		return 0, err
   504  	}
   505  
   506  	// Read got EOF, so pipe closed on exec, so exec succeeded.
   507  	return pid, nil
   508  }
   509  
   510  type waitErr struct {
   511  	Waitmsg
   512  	err error
   513  }
   514  
   515  var procs struct {
   516  	sync.Mutex
   517  	waits map[int]chan *waitErr
   518  }
   519  
   520  // startProcess starts a new goroutine, tied to the OS
   521  // thread, which runs the process and subsequently waits
   522  // for it to finish, communicating the process stats back
   523  // to any goroutines that may have been waiting on it.
   524  //
   525  // Such a dedicated goroutine is needed because on
   526  // Plan 9, only the parent thread can wait for a child,
   527  // whereas goroutines tend to jump OS threads (e.g.,
   528  // between starting a process and running Wait(), the
   529  // goroutine may have been rescheduled).
   530  func startProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   531  	type forkRet struct {
   532  		pid int
   533  		err error
   534  	}
   535  
   536  	forkc := make(chan forkRet, 1)
   537  	go func() {
   538  		runtime.LockOSThread()
   539  		var ret forkRet
   540  
   541  		ret.pid, ret.err = forkExec(argv0, argv, attr)
   542  		// If fork fails there is nothing to wait for.
   543  		if ret.err != nil || ret.pid == 0 {
   544  			forkc <- ret
   545  			return
   546  		}
   547  
   548  		waitc := make(chan *waitErr, 1)
   549  
   550  		// Mark that the process is running.
   551  		procs.Lock()
   552  		if procs.waits == nil {
   553  			procs.waits = make(map[int]chan *waitErr)
   554  		}
   555  		procs.waits[ret.pid] = waitc
   556  		procs.Unlock()
   557  
   558  		forkc <- ret
   559  
   560  		var w waitErr
   561  		for w.err == nil && w.Pid != ret.pid {
   562  			w.err = Await(&w.Waitmsg)
   563  		}
   564  		waitc <- &w
   565  		close(waitc)
   566  	}()
   567  	ret := <-forkc
   568  	return ret.pid, ret.err
   569  }
   570  
   571  // Combination of fork and exec, careful to be thread safe.
   572  func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
   573  	return startProcess(argv0, argv, attr)
   574  }
   575  
   576  // StartProcess wraps ForkExec for package os.
   577  func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) {
   578  	pid, err = startProcess(argv0, argv, attr)
   579  	return pid, 0, err
   580  }
   581  
   582  // Ordinary exec.
   583  func Exec(argv0 string, argv []string, envv []string) (err error) {
   584  	if envv != nil {
   585  		r1, _, _ := RawSyscall(SYS_RFORK, RFCENVG, 0, 0)
   586  		if int32(r1) == -1 {
   587  			return NewError(errstr())
   588  		}
   589  
   590  		for _, v := range envv {
   591  			i := 0
   592  			for i < len(v) && v[i] != '=' {
   593  				i++
   594  			}
   595  
   596  			fd, e := Create("/env/"+v[:i], O_WRONLY, 0666)
   597  			if e != nil {
   598  				return e
   599  			}
   600  
   601  			_, e = Write(fd, []byte(v[i+1:]))
   602  			if e != nil {
   603  				Close(fd)
   604  				return e
   605  			}
   606  			Close(fd)
   607  		}
   608  	}
   609  
   610  	argv0p, err := BytePtrFromString(argv0)
   611  	if err != nil {
   612  		return err
   613  	}
   614  	argvp, err := SlicePtrFromStrings(argv)
   615  	if err != nil {
   616  		return err
   617  	}
   618  	_, _, e1 := Syscall(SYS_EXEC,
   619  		uintptr(unsafe.Pointer(argv0p)),
   620  		uintptr(unsafe.Pointer(&argvp[0])),
   621  		0)
   622  
   623  	return e1
   624  }
   625  
   626  // WaitProcess waits until the pid of a
   627  // running process is found in the queue of
   628  // wait messages. It is used in conjunction
   629  // with ForkExec/StartProcess to wait for a
   630  // running process to exit.
   631  func WaitProcess(pid int, w *Waitmsg) (err error) {
   632  	procs.Lock()
   633  	ch := procs.waits[pid]
   634  	procs.Unlock()
   635  
   636  	var wmsg *waitErr
   637  	if ch != nil {
   638  		wmsg = <-ch
   639  		procs.Lock()
   640  		if procs.waits[pid] == ch {
   641  			delete(procs.waits, pid)
   642  		}
   643  		procs.Unlock()
   644  	}
   645  	if wmsg == nil {
   646  		// ch was missing or ch is closed
   647  		return NewError("process not found")
   648  	}
   649  	if wmsg.err != nil {
   650  		return wmsg.err
   651  	}
   652  	if w != nil {
   653  		*w = wmsg.Waitmsg
   654  	}
   655  	return nil
   656  }