github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/platform/ptrace/subprocess.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package ptrace
    16  
    17  import (
    18  	"fmt"
    19  	"os"
    20  	"runtime"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    24  	"github.com/SagerNet/gvisor/pkg/hostarch"
    25  	"github.com/SagerNet/gvisor/pkg/log"
    26  	"github.com/SagerNet/gvisor/pkg/procid"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/memmap"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/platform"
    30  	"github.com/SagerNet/gvisor/pkg/sync"
    31  )
    32  
    33  // Linux kernel errnos which "should never be seen by user programs", but will
    34  // be revealed to ptrace syscall exit tracing.
    35  //
    36  // These constants are only used in subprocess.go.
    37  const (
    38  	ERESTARTSYS    = unix.Errno(512)
    39  	ERESTARTNOINTR = unix.Errno(513)
    40  	ERESTARTNOHAND = unix.Errno(514)
    41  )
    42  
    43  // globalPool exists to solve two distinct problems:
    44  //
    45  // 1) Subprocesses can't always be killed properly (see Release).
    46  //
    47  // 2) Any seccomp filters that have been installed will apply to subprocesses
    48  // created here. Therefore we use the intermediary (master), which is created
    49  // on initialization of the platform.
    50  var globalPool struct {
    51  	mu        sync.Mutex
    52  	master    *subprocess
    53  	available []*subprocess
    54  }
    55  
    56  // thread is a traced thread; it is a thread identifier.
    57  //
    58  // This is a convenience type for defining ptrace operations.
    59  type thread struct {
    60  	tgid int32
    61  	tid  int32
    62  	cpu  uint32
    63  
    64  	// initRegs are the initial registers for the first thread.
    65  	//
    66  	// These are used for the register set for system calls.
    67  	initRegs arch.Registers
    68  }
    69  
    70  // threadPool is a collection of threads.
    71  type threadPool struct {
    72  	// mu protects below.
    73  	mu sync.RWMutex
    74  
    75  	// threads is the collection of threads.
    76  	//
    77  	// This map is indexed by system TID (the calling thread); which will
    78  	// be the tracer for the given *thread, and therefore capable of using
    79  	// relevant ptrace calls.
    80  	threads map[int32]*thread
    81  }
    82  
    83  // lookupOrCreate looks up a given thread or creates one.
    84  //
    85  // newThread will generally be subprocess.newThread.
    86  //
    87  // Precondition: the runtime OS thread must be locked.
    88  func (tp *threadPool) lookupOrCreate(currentTID int32, newThread func() *thread) *thread {
    89  	// The overwhelming common case is that the thread is already created.
    90  	// Optimistically attempt the lookup by only locking for reading.
    91  	tp.mu.RLock()
    92  	t, ok := tp.threads[currentTID]
    93  	tp.mu.RUnlock()
    94  	if ok {
    95  		return t
    96  	}
    97  
    98  	tp.mu.Lock()
    99  	defer tp.mu.Unlock()
   100  
   101  	// Another goroutine might have created the thread for currentTID in between
   102  	// mu.RUnlock() and mu.Lock().
   103  	if t, ok = tp.threads[currentTID]; ok {
   104  		return t
   105  	}
   106  
   107  	// Before creating a new thread, see if we can find a thread
   108  	// whose system tid has disappeared.
   109  	//
   110  	// TODO(b/77216482): Other parts of this package depend on
   111  	// threads never exiting.
   112  	for origTID, t := range tp.threads {
   113  		// Signal zero is an easy existence check.
   114  		if err := unix.Tgkill(unix.Getpid(), int(origTID), 0); err != nil {
   115  			// This thread has been abandoned; reuse it.
   116  			delete(tp.threads, origTID)
   117  			tp.threads[currentTID] = t
   118  			return t
   119  		}
   120  	}
   121  
   122  	// Create a new thread.
   123  	t = newThread()
   124  	tp.threads[currentTID] = t
   125  	return t
   126  }
   127  
   128  // subprocess is a collection of threads being traced.
   129  type subprocess struct {
   130  	platform.NoAddressSpaceIO
   131  
   132  	// requests is used to signal creation of new threads.
   133  	requests chan chan *thread
   134  
   135  	// sysemuThreads are reserved for emulation.
   136  	sysemuThreads threadPool
   137  
   138  	// syscallThreads are reserved for syscalls (except clone, which is
   139  	// handled in the dedicated goroutine corresponding to requests above).
   140  	syscallThreads threadPool
   141  
   142  	// mu protects the following fields.
   143  	mu sync.Mutex
   144  
   145  	// contexts is the set of contexts for which it's possible that
   146  	// context.lastFaultSP == this subprocess.
   147  	contexts map[*context]struct{}
   148  }
   149  
   150  // newSubprocess returns a usable subprocess.
   151  //
   152  // This will either be a newly created subprocess, or one from the global pool.
   153  // The create function will be called in the latter case, which is guaranteed
   154  // to happen with the runtime thread locked.
   155  func newSubprocess(create func() (*thread, error)) (*subprocess, error) {
   156  	// See Release.
   157  	globalPool.mu.Lock()
   158  	if len(globalPool.available) > 0 {
   159  		sp := globalPool.available[len(globalPool.available)-1]
   160  		globalPool.available = globalPool.available[:len(globalPool.available)-1]
   161  		globalPool.mu.Unlock()
   162  		return sp, nil
   163  	}
   164  	globalPool.mu.Unlock()
   165  
   166  	// The following goroutine is responsible for creating the first traced
   167  	// thread, and responding to requests to make additional threads in the
   168  	// traced process. The process will be killed and reaped when the
   169  	// request channel is closed, which happens in Release below.
   170  	errChan := make(chan error)
   171  	requests := make(chan chan *thread)
   172  	go func() { // S/R-SAFE: Platform-related.
   173  		runtime.LockOSThread()
   174  		defer runtime.UnlockOSThread()
   175  
   176  		// Initialize the first thread.
   177  		firstThread, err := create()
   178  		if err != nil {
   179  			errChan <- err
   180  			return
   181  		}
   182  		firstThread.grabInitRegs()
   183  
   184  		// Ready to handle requests.
   185  		errChan <- nil
   186  
   187  		// Wait for requests to create threads.
   188  		for r := range requests {
   189  			t, err := firstThread.clone()
   190  			if err != nil {
   191  				// Should not happen: not recoverable.
   192  				panic(fmt.Sprintf("error initializing first thread: %v", err))
   193  			}
   194  
   195  			// Since the new thread was created with
   196  			// clone(CLONE_PTRACE), it will begin execution with
   197  			// SIGSTOP pending and with this thread as its tracer.
   198  			// (Hopefully nobody tgkilled it with a signal <
   199  			// SIGSTOP before the SIGSTOP was delivered, in which
   200  			// case that signal would be delivered before SIGSTOP.)
   201  			if sig := t.wait(stopped); sig != unix.SIGSTOP {
   202  				panic(fmt.Sprintf("error waiting for new clone: expected SIGSTOP, got %v", sig))
   203  			}
   204  
   205  			// Detach the thread.
   206  			t.detach()
   207  			t.initRegs = firstThread.initRegs
   208  
   209  			// Return the thread.
   210  			r <- t
   211  		}
   212  
   213  		// Requests should never be closed.
   214  		panic("unreachable")
   215  	}()
   216  
   217  	// Wait until error or readiness.
   218  	if err := <-errChan; err != nil {
   219  		return nil, err
   220  	}
   221  
   222  	// Ready.
   223  	sp := &subprocess{
   224  		requests: requests,
   225  		sysemuThreads: threadPool{
   226  			threads: make(map[int32]*thread),
   227  		},
   228  		syscallThreads: threadPool{
   229  			threads: make(map[int32]*thread),
   230  		},
   231  		contexts: make(map[*context]struct{}),
   232  	}
   233  
   234  	sp.unmap()
   235  	return sp, nil
   236  }
   237  
   238  // unmap unmaps non-stub regions of the process.
   239  //
   240  // This will panic on failure (which should never happen).
   241  func (s *subprocess) unmap() {
   242  	s.Unmap(0, uint64(stubStart))
   243  	if maximumUserAddress != stubEnd {
   244  		s.Unmap(hostarch.Addr(stubEnd), uint64(maximumUserAddress-stubEnd))
   245  	}
   246  }
   247  
   248  // Release kills the subprocess.
   249  //
   250  // Just kidding! We can't safely co-ordinate the detaching of all the
   251  // tracees (since the tracers are random runtime threads, and the process
   252  // won't exit until tracers have been notifier).
   253  //
   254  // Therefore we simply unmap everything in the subprocess and return it to the
   255  // globalPool. This has the added benefit of reducing creation time for new
   256  // subprocesses.
   257  func (s *subprocess) Release() {
   258  	go func() { // S/R-SAFE: Platform.
   259  		s.unmap()
   260  		globalPool.mu.Lock()
   261  		globalPool.available = append(globalPool.available, s)
   262  		globalPool.mu.Unlock()
   263  	}()
   264  }
   265  
   266  // newThread creates a new traced thread.
   267  //
   268  // Precondition: the OS thread must be locked.
   269  func (s *subprocess) newThread() *thread {
   270  	// Ask the first thread to create a new one.
   271  	r := make(chan *thread)
   272  	s.requests <- r
   273  	t := <-r
   274  
   275  	// Attach the subprocess to this one.
   276  	t.attach()
   277  
   278  	// Return the new thread, which is now bound.
   279  	return t
   280  }
   281  
   282  // attach attaches to the thread.
   283  func (t *thread) attach() {
   284  	if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_ATTACH, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   285  		panic(fmt.Sprintf("unable to attach: %v", errno))
   286  	}
   287  
   288  	// PTRACE_ATTACH sends SIGSTOP, and wakes the tracee if it was already
   289  	// stopped from the SIGSTOP queued by CLONE_PTRACE (see inner loop of
   290  	// newSubprocess), so we always expect to see signal-delivery-stop with
   291  	// SIGSTOP.
   292  	if sig := t.wait(stopped); sig != unix.SIGSTOP {
   293  		panic(fmt.Sprintf("wait failed: expected SIGSTOP, got %v", sig))
   294  	}
   295  
   296  	// Initialize options.
   297  	t.init()
   298  }
   299  
   300  func (t *thread) grabInitRegs() {
   301  	// Grab registers.
   302  	//
   303  	// Note that we adjust the current register RIP value to be just before
   304  	// the current system call executed. This depends on the definition of
   305  	// the stub itself.
   306  	if err := t.getRegs(&t.initRegs); err != nil {
   307  		panic(fmt.Sprintf("ptrace get regs failed: %v", err))
   308  	}
   309  	t.adjustInitRegsRip()
   310  }
   311  
   312  // detach detaches from the thread.
   313  //
   314  // Because the SIGSTOP is not suppressed, the thread will enter group-stop.
   315  func (t *thread) detach() {
   316  	if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_DETACH, uintptr(t.tid), 0, uintptr(unix.SIGSTOP), 0, 0); errno != 0 {
   317  		panic(fmt.Sprintf("can't detach new clone: %v", errno))
   318  	}
   319  }
   320  
   321  // waitOutcome is used for wait below.
   322  type waitOutcome int
   323  
   324  const (
   325  	// stopped indicates that the process was stopped.
   326  	stopped waitOutcome = iota
   327  
   328  	// killed indicates that the process was killed.
   329  	killed
   330  )
   331  
   332  func (t *thread) dumpAndPanic(message string) {
   333  	var regs arch.Registers
   334  	message += "\n"
   335  	if err := t.getRegs(&regs); err == nil {
   336  		message += dumpRegs(&regs)
   337  	} else {
   338  		log.Warningf("unable to get registers: %v", err)
   339  	}
   340  	message += fmt.Sprintf("stubStart\t = %016x\n", stubStart)
   341  	panic(message)
   342  }
   343  
   344  func (t *thread) unexpectedStubExit() {
   345  	msg, err := t.getEventMessage()
   346  	status := unix.WaitStatus(msg)
   347  	if status.Signaled() && status.Signal() == unix.SIGKILL {
   348  		// SIGKILL can be only sent by a user or OOM-killer. In both
   349  		// these cases, we don't need to panic. There is no reasons to
   350  		// think that something wrong in gVisor.
   351  		log.Warningf("The ptrace stub process %v has been killed by SIGKILL.", t.tgid)
   352  		pid := os.Getpid()
   353  		unix.Tgkill(pid, pid, unix.Signal(unix.SIGKILL))
   354  	}
   355  	t.dumpAndPanic(fmt.Sprintf("wait failed: the process %d:%d exited: %x (err %v)", t.tgid, t.tid, msg, err))
   356  }
   357  
   358  // wait waits for a stop event.
   359  //
   360  // Precondition: outcome is a valid waitOutcome.
   361  func (t *thread) wait(outcome waitOutcome) unix.Signal {
   362  	var status unix.WaitStatus
   363  
   364  	for {
   365  		r, err := unix.Wait4(int(t.tid), &status, unix.WALL|unix.WUNTRACED, nil)
   366  		if err == unix.EINTR || err == unix.EAGAIN {
   367  			// Wait was interrupted; wait again.
   368  			continue
   369  		} else if err != nil {
   370  			panic(fmt.Sprintf("ptrace wait failed: %v", err))
   371  		}
   372  		if int(r) != int(t.tid) {
   373  			panic(fmt.Sprintf("ptrace wait returned %v, expected %v", r, t.tid))
   374  		}
   375  		switch outcome {
   376  		case stopped:
   377  			if !status.Stopped() {
   378  				t.dumpAndPanic(fmt.Sprintf("ptrace status unexpected: got %v, wanted stopped", status))
   379  			}
   380  			stopSig := status.StopSignal()
   381  			if stopSig == 0 {
   382  				continue // Spurious stop.
   383  			}
   384  			if stopSig == unix.SIGTRAP {
   385  				if status.TrapCause() == unix.PTRACE_EVENT_EXIT {
   386  					t.unexpectedStubExit()
   387  				}
   388  				// Re-encode the trap cause the way it's expected.
   389  				return stopSig | unix.Signal(status.TrapCause()<<8)
   390  			}
   391  			// Not a trap signal.
   392  			return stopSig
   393  		case killed:
   394  			if !status.Exited() && !status.Signaled() {
   395  				t.dumpAndPanic(fmt.Sprintf("ptrace status unexpected: got %v, wanted exited", status))
   396  			}
   397  			return unix.Signal(status.ExitStatus())
   398  		default:
   399  			// Should not happen.
   400  			t.dumpAndPanic(fmt.Sprintf("unknown outcome: %v", outcome))
   401  		}
   402  	}
   403  }
   404  
   405  // destroy kills the thread.
   406  //
   407  // Note that this should not be used in the general case; the death of threads
   408  // will typically cause the death of the parent. This is a utility method for
   409  // manually created threads.
   410  func (t *thread) destroy() {
   411  	t.detach()
   412  	unix.Tgkill(int(t.tgid), int(t.tid), unix.Signal(unix.SIGKILL))
   413  	t.wait(killed)
   414  }
   415  
   416  // init initializes trace options.
   417  func (t *thread) init() {
   418  	// Set the TRACESYSGOOD option to differentiate real SIGTRAP.
   419  	// set PTRACE_O_EXITKILL to ensure that the unexpected exit of the
   420  	// sentry will immediately kill the associated stubs.
   421  	const PTRACE_O_EXITKILL = 0x100000
   422  	_, _, errno := unix.RawSyscall6(
   423  		unix.SYS_PTRACE,
   424  		unix.PTRACE_SETOPTIONS,
   425  		uintptr(t.tid),
   426  		0,
   427  		unix.PTRACE_O_TRACESYSGOOD|unix.PTRACE_O_TRACEEXIT|PTRACE_O_EXITKILL,
   428  		0, 0)
   429  	if errno != 0 {
   430  		panic(fmt.Sprintf("ptrace set options failed: %v", errno))
   431  	}
   432  }
   433  
   434  // syscall executes a system call cycle in the traced context.
   435  //
   436  // This is _not_ for use by application system calls, rather it is for use when
   437  // a system call must be injected into the remote context (e.g. mmap, munmap).
   438  // Note that clones are handled separately.
   439  func (t *thread) syscall(regs *arch.Registers) (uintptr, error) {
   440  	// Set registers.
   441  	if err := t.setRegs(regs); err != nil {
   442  		panic(fmt.Sprintf("ptrace set regs failed: %v", err))
   443  	}
   444  
   445  	for {
   446  		// Execute the syscall instruction. The task has to stop on the
   447  		// trap instruction which is right after the syscall
   448  		// instruction.
   449  		if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_CONT, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   450  			panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
   451  		}
   452  
   453  		sig := t.wait(stopped)
   454  		if sig == unix.SIGTRAP {
   455  			// Reached syscall-enter-stop.
   456  			break
   457  		} else {
   458  			// Some other signal caused a thread stop; ignore.
   459  			if sig != unix.SIGSTOP && sig != unix.SIGCHLD {
   460  				log.Warningf("The thread %d:%d has been interrupted by %d", t.tgid, t.tid, sig)
   461  			}
   462  			continue
   463  		}
   464  	}
   465  
   466  	// Grab registers.
   467  	if err := t.getRegs(regs); err != nil {
   468  		panic(fmt.Sprintf("ptrace get regs failed: %v", err))
   469  	}
   470  
   471  	return syscallReturnValue(regs)
   472  }
   473  
   474  // syscallIgnoreInterrupt ignores interrupts on the system call thread and
   475  // restarts the syscall if the kernel indicates that should happen.
   476  func (t *thread) syscallIgnoreInterrupt(
   477  	initRegs *arch.Registers,
   478  	sysno uintptr,
   479  	args ...arch.SyscallArgument) (uintptr, error) {
   480  	for {
   481  		regs := createSyscallRegs(initRegs, sysno, args...)
   482  		rval, err := t.syscall(&regs)
   483  		switch err {
   484  		case ERESTARTSYS:
   485  			continue
   486  		case ERESTARTNOINTR:
   487  			continue
   488  		case ERESTARTNOHAND:
   489  			continue
   490  		default:
   491  			return rval, err
   492  		}
   493  	}
   494  }
   495  
   496  // NotifyInterrupt implements interrupt.Receiver.NotifyInterrupt.
   497  func (t *thread) NotifyInterrupt() {
   498  	unix.Tgkill(int(t.tgid), int(t.tid), unix.Signal(platform.SignalInterrupt))
   499  }
   500  
   501  // switchToApp is called from the main SwitchToApp entrypoint.
   502  //
   503  // This function returns true on a system call, false on a signal.
   504  func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
   505  	// Lock the thread for ptrace operations.
   506  	runtime.LockOSThread()
   507  	defer runtime.UnlockOSThread()
   508  
   509  	// Extract floating point state.
   510  	fpState := ac.FloatingPointData()
   511  	fpLen, _ := ac.FeatureSet().ExtendedStateSize()
   512  	useXsave := ac.FeatureSet().UseXsave()
   513  
   514  	// Grab our thread from the pool.
   515  	currentTID := int32(procid.Current())
   516  	t := s.sysemuThreads.lookupOrCreate(currentTID, s.newThread)
   517  
   518  	// Reset necessary registers.
   519  	regs := &ac.StateData().Regs
   520  	t.resetSysemuRegs(regs)
   521  
   522  	// Extract TLS register
   523  	tls := uint64(ac.TLS())
   524  
   525  	// Check for interrupts, and ensure that future interrupts will signal t.
   526  	if !c.interrupt.Enable(t) {
   527  		// Pending interrupt; simulate.
   528  		c.signalInfo = linux.SignalInfo{Signo: int32(platform.SignalInterrupt)}
   529  		return false
   530  	}
   531  	defer c.interrupt.Disable()
   532  
   533  	// Set registers.
   534  	if err := t.setRegs(regs); err != nil {
   535  		panic(fmt.Sprintf("ptrace set regs (%+v) failed: %v", regs, err))
   536  	}
   537  	if err := t.setFPRegs(fpState, uint64(fpLen), useXsave); err != nil {
   538  		panic(fmt.Sprintf("ptrace set fpregs (%+v) failed: %v", fpState, err))
   539  	}
   540  	if err := t.setTLS(&tls); err != nil {
   541  		panic(fmt.Sprintf("ptrace set tls (%+v) failed: %v", tls, err))
   542  	}
   543  
   544  	for {
   545  		// Start running until the next system call.
   546  		if isSingleStepping(regs) {
   547  			if _, _, errno := unix.RawSyscall6(
   548  				unix.SYS_PTRACE,
   549  				unix.PTRACE_SYSEMU_SINGLESTEP,
   550  				uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   551  				panic(fmt.Sprintf("ptrace sysemu failed: %v", errno))
   552  			}
   553  		} else {
   554  			if _, _, errno := unix.RawSyscall6(
   555  				unix.SYS_PTRACE,
   556  				unix.PTRACE_SYSEMU,
   557  				uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   558  				panic(fmt.Sprintf("ptrace sysemu failed: %v", errno))
   559  			}
   560  		}
   561  
   562  		// Wait for the syscall-enter stop.
   563  		sig := t.wait(stopped)
   564  
   565  		if sig == unix.SIGSTOP {
   566  			// SIGSTOP was delivered to another thread in the same thread
   567  			// group, which initiated another group stop. Just ignore it.
   568  			continue
   569  		}
   570  
   571  		// Refresh all registers.
   572  		if err := t.getRegs(regs); err != nil {
   573  			panic(fmt.Sprintf("ptrace get regs failed: %v", err))
   574  		}
   575  		if err := t.getFPRegs(fpState, uint64(fpLen), useXsave); err != nil {
   576  			panic(fmt.Sprintf("ptrace get fpregs failed: %v", err))
   577  		}
   578  		if err := t.getTLS(&tls); err != nil {
   579  			panic(fmt.Sprintf("ptrace get tls failed: %v", err))
   580  		}
   581  		if !ac.SetTLS(uintptr(tls)) {
   582  			panic(fmt.Sprintf("tls value %v is invalid", tls))
   583  		}
   584  
   585  		// Is it a system call?
   586  		if sig == (syscallEvent | unix.SIGTRAP) {
   587  			s.arm64SyscallWorkaround(t, regs)
   588  
   589  			// Ensure registers are sane.
   590  			updateSyscallRegs(regs)
   591  			return true
   592  		}
   593  
   594  		// Grab signal information.
   595  		if err := t.getSignalInfo(&c.signalInfo); err != nil {
   596  			// Should never happen.
   597  			panic(fmt.Sprintf("ptrace get signal info failed: %v", err))
   598  		}
   599  
   600  		// We have a signal. We verify however, that the signal was
   601  		// either delivered from the kernel or from this process. We
   602  		// don't respect other signals.
   603  		if c.signalInfo.Code > 0 {
   604  			// The signal was generated by the kernel. We inspect
   605  			// the signal information, and may patch it in order to
   606  			// facilitate vsyscall emulation. See patchSignalInfo.
   607  			patchSignalInfo(regs, &c.signalInfo)
   608  			return false
   609  		} else if c.signalInfo.Code <= 0 && c.signalInfo.PID() == int32(os.Getpid()) {
   610  			// The signal was generated by this process. That means
   611  			// that it was an interrupt or something else that we
   612  			// should bail for. Note that we ignore signals
   613  			// generated by other processes.
   614  			return false
   615  		}
   616  	}
   617  }
   618  
   619  // syscall executes the given system call without handling interruptions.
   620  func (s *subprocess) syscall(sysno uintptr, args ...arch.SyscallArgument) (uintptr, error) {
   621  	// Grab a thread.
   622  	runtime.LockOSThread()
   623  	defer runtime.UnlockOSThread()
   624  	currentTID := int32(procid.Current())
   625  	t := s.syscallThreads.lookupOrCreate(currentTID, s.newThread)
   626  
   627  	return t.syscallIgnoreInterrupt(&t.initRegs, sysno, args...)
   628  }
   629  
   630  // MapFile implements platform.AddressSpace.MapFile.
   631  func (s *subprocess) MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error {
   632  	var flags int
   633  	if precommit {
   634  		flags |= unix.MAP_POPULATE
   635  	}
   636  	_, err := s.syscall(
   637  		unix.SYS_MMAP,
   638  		arch.SyscallArgument{Value: uintptr(addr)},
   639  		arch.SyscallArgument{Value: uintptr(fr.Length())},
   640  		arch.SyscallArgument{Value: uintptr(at.Prot())},
   641  		arch.SyscallArgument{Value: uintptr(flags | unix.MAP_SHARED | unix.MAP_FIXED)},
   642  		arch.SyscallArgument{Value: uintptr(f.FD())},
   643  		arch.SyscallArgument{Value: uintptr(fr.Start)})
   644  	return err
   645  }
   646  
   647  // Unmap implements platform.AddressSpace.Unmap.
   648  func (s *subprocess) Unmap(addr hostarch.Addr, length uint64) {
   649  	ar, ok := addr.ToRange(length)
   650  	if !ok {
   651  		panic(fmt.Sprintf("addr %#x + length %#x overflows", addr, length))
   652  	}
   653  	s.mu.Lock()
   654  	for c := range s.contexts {
   655  		c.mu.Lock()
   656  		if c.lastFaultSP == s && ar.Contains(c.lastFaultAddr) {
   657  			// Forget the last fault so that if c faults again, the fault isn't
   658  			// incorrectly reported as a write fault. If this is being called
   659  			// due to munmap() of the corresponding vma, handling of the second
   660  			// fault will fail anyway.
   661  			c.lastFaultSP = nil
   662  			delete(s.contexts, c)
   663  		}
   664  		c.mu.Unlock()
   665  	}
   666  	s.mu.Unlock()
   667  	_, err := s.syscall(
   668  		unix.SYS_MUNMAP,
   669  		arch.SyscallArgument{Value: uintptr(addr)},
   670  		arch.SyscallArgument{Value: uintptr(length)})
   671  	if err != nil {
   672  		// We never expect this to happen.
   673  		panic(fmt.Sprintf("munmap(%x, %x)) failed: %v", addr, length, err))
   674  	}
   675  }
   676  
   677  // PreFork implements platform.AddressSpace.PreFork.
   678  func (s *subprocess) PreFork() {}
   679  
   680  // PostFork implements platform.AddressSpace.PostFork.
   681  func (s *subprocess) PostFork() {}