github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/platform/ptrace/subprocess.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package ptrace
    16  
    17  import (
    18  	"fmt"
    19  	"os"
    20  	"runtime"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"github.com/metacubex/gvisor/pkg/abi/linux"
    24  	"github.com/metacubex/gvisor/pkg/hostarch"
    25  	"github.com/metacubex/gvisor/pkg/hosttid"
    26  	"github.com/metacubex/gvisor/pkg/log"
    27  	"github.com/metacubex/gvisor/pkg/sentry/arch"
    28  	"github.com/metacubex/gvisor/pkg/sentry/memmap"
    29  	"github.com/metacubex/gvisor/pkg/sentry/platform"
    30  	"github.com/metacubex/gvisor/pkg/sync"
    31  )
    32  
    33  var (
    34  	// maximumUserAddress is the largest possible user address.
    35  	maximumUserAddress = linux.TaskSize
    36  
    37  	// stubInitAddress is the initial attempt link address for the stub.
    38  	stubInitAddress = linux.TaskSize
    39  )
    40  
    41  // Linux kernel errnos which "should never be seen by user programs", but will
    42  // be revealed to ptrace syscall exit tracing.
    43  //
    44  // These constants are only used in subprocess.go.
    45  const (
    46  	ERESTARTSYS    = unix.Errno(512)
    47  	ERESTARTNOINTR = unix.Errno(513)
    48  	ERESTARTNOHAND = unix.Errno(514)
    49  )
    50  
    51  // globalPool exists to solve two distinct problems:
    52  //
    53  // 1) Subprocesses can't always be killed properly (see Release).
    54  //
    55  // 2) Any seccomp filters that have been installed will apply to subprocesses
    56  // created here. Therefore we use the intermediary (master), which is created
    57  // on initialization of the platform.
    58  var globalPool struct {
    59  	mu        sync.Mutex
    60  	master    *subprocess
    61  	available []*subprocess
    62  }
    63  
    64  // thread is a traced thread; it is a thread identifier.
    65  //
    66  // This is a convenience type for defining ptrace operations.
    67  type thread struct {
    68  	tgid int32
    69  	tid  int32
    70  	cpu  uint32
    71  
    72  	// initRegs are the initial registers for the first thread.
    73  	//
    74  	// These are used for the register set for system calls.
    75  	initRegs arch.Registers
    76  }
    77  
    78  // threadPool is a collection of threads.
    79  type threadPool struct {
    80  	// mu protects below.
    81  	mu sync.RWMutex
    82  
    83  	// threads is the collection of threads.
    84  	//
    85  	// This map is indexed by system TID (the calling thread); which will
    86  	// be the tracer for the given *thread, and therefore capable of using
    87  	// relevant ptrace calls.
    88  	threads map[int32]*thread
    89  }
    90  
    91  // lookupOrCreate looks up a given thread or creates one.
    92  //
    93  // newThread will generally be subprocess.newThread.
    94  //
    95  // Precondition: the runtime OS thread must be locked.
    96  func (tp *threadPool) lookupOrCreate(currentTID int32, newThread func() *thread) *thread {
    97  	// The overwhelming common case is that the thread is already created.
    98  	// Optimistically attempt the lookup by only locking for reading.
    99  	tp.mu.RLock()
   100  	t, ok := tp.threads[currentTID]
   101  	tp.mu.RUnlock()
   102  	if ok {
   103  		return t
   104  	}
   105  
   106  	tp.mu.Lock()
   107  	defer tp.mu.Unlock()
   108  
   109  	// Another goroutine might have created the thread for currentTID in between
   110  	// mu.RUnlock() and mu.Lock().
   111  	if t, ok = tp.threads[currentTID]; ok {
   112  		return t
   113  	}
   114  
   115  	// Before creating a new thread, see if we can find a thread
   116  	// whose system tid has disappeared.
   117  	//
   118  	// TODO(b/77216482): Other parts of this package depend on
   119  	// threads never exiting.
   120  	for origTID, t := range tp.threads {
   121  		// Signal zero is an easy existence check.
   122  		if err := unix.Tgkill(unix.Getpid(), int(origTID), 0); err != nil {
   123  			// This thread has been abandoned; reuse it.
   124  			delete(tp.threads, origTID)
   125  			tp.threads[currentTID] = t
   126  			return t
   127  		}
   128  	}
   129  
   130  	// Create a new thread.
   131  	t = newThread()
   132  	tp.threads[currentTID] = t
   133  	return t
   134  }
   135  
   136  // subprocess is a collection of threads being traced.
   137  type subprocess struct {
   138  	platform.NoAddressSpaceIO
   139  
   140  	// requests is used to signal creation of new threads.
   141  	requests chan chan *thread
   142  
   143  	// sysemuThreads are reserved for emulation.
   144  	sysemuThreads threadPool
   145  
   146  	// syscallThreads are reserved for syscalls (except clone, which is
   147  	// handled in the dedicated goroutine corresponding to requests above).
   148  	syscallThreads threadPool
   149  
   150  	// mu protects the following fields.
   151  	mu sync.Mutex
   152  
   153  	// contexts is the set of contexts for which it's possible that
   154  	// context.lastFaultSP == this subprocess.
   155  	contexts map[*context]struct{}
   156  }
   157  
   158  // newSubprocess returns a usable subprocess.
   159  //
   160  // This will either be a newly created subprocess, or one from the global pool.
   161  // The create function will be called in the latter case, which is guaranteed
   162  // to happen with the runtime thread locked.
   163  func newSubprocess(create func() (*thread, error)) (*subprocess, error) {
   164  	// See Release.
   165  	globalPool.mu.Lock()
   166  	if len(globalPool.available) > 0 {
   167  		sp := globalPool.available[len(globalPool.available)-1]
   168  		globalPool.available = globalPool.available[:len(globalPool.available)-1]
   169  		globalPool.mu.Unlock()
   170  		return sp, nil
   171  	}
   172  	globalPool.mu.Unlock()
   173  
   174  	// The following goroutine is responsible for creating the first traced
   175  	// thread, and responding to requests to make additional threads in the
   176  	// traced process. The process will be killed and reaped when the
   177  	// request channel is closed, which happens in Release below.
   178  	errChan := make(chan error)
   179  	requests := make(chan chan *thread)
   180  	go func() { // S/R-SAFE: Platform-related.
   181  		runtime.LockOSThread()
   182  		defer runtime.UnlockOSThread()
   183  
   184  		// Initialize the first thread.
   185  		firstThread, err := create()
   186  		if err != nil {
   187  			errChan <- err
   188  			return
   189  		}
   190  		firstThread.grabInitRegs()
   191  
   192  		// Ready to handle requests.
   193  		errChan <- nil
   194  
   195  		// Wait for requests to create threads.
   196  		for r := range requests {
   197  			t, err := firstThread.clone()
   198  			if err != nil {
   199  				// Should not happen: not recoverable.
   200  				panic(fmt.Sprintf("error initializing first thread: %v", err))
   201  			}
   202  
   203  			// Since the new thread was created with
   204  			// clone(CLONE_PTRACE), it will begin execution with
   205  			// SIGSTOP pending and with this thread as its tracer.
   206  			// (Hopefully nobody tgkilled it with a signal <
   207  			// SIGSTOP before the SIGSTOP was delivered, in which
   208  			// case that signal would be delivered before SIGSTOP.)
   209  			if sig := t.wait(stopped); sig != unix.SIGSTOP {
   210  				panic(fmt.Sprintf("error waiting for new clone: expected SIGSTOP, got %v", sig))
   211  			}
   212  
   213  			// Detach the thread.
   214  			t.detach()
   215  			t.initRegs = firstThread.initRegs
   216  
   217  			// Return the thread.
   218  			r <- t
   219  		}
   220  
   221  		// Requests should never be closed.
   222  		panic("unreachable")
   223  	}()
   224  
   225  	// Wait until error or readiness.
   226  	if err := <-errChan; err != nil {
   227  		return nil, err
   228  	}
   229  
   230  	// Ready.
   231  	sp := &subprocess{
   232  		requests: requests,
   233  		sysemuThreads: threadPool{
   234  			threads: make(map[int32]*thread),
   235  		},
   236  		syscallThreads: threadPool{
   237  			threads: make(map[int32]*thread),
   238  		},
   239  		contexts: make(map[*context]struct{}),
   240  	}
   241  
   242  	sp.unmap()
   243  	return sp, nil
   244  }
   245  
   246  // unmap unmaps non-stub regions of the process.
   247  //
   248  // This will panic on failure (which should never happen).
   249  func (s *subprocess) unmap() {
   250  	s.Unmap(0, uint64(stubStart))
   251  	if maximumUserAddress != stubEnd {
   252  		s.Unmap(hostarch.Addr(stubEnd), uint64(maximumUserAddress-stubEnd))
   253  	}
   254  }
   255  
   256  // Release kills the subprocess.
   257  //
   258  // Just kidding! We can't safely coordinate the detaching of all the
   259  // tracees (since the tracers are random runtime threads, and the process
   260  // won't exit until tracers have been notifier).
   261  //
   262  // Therefore we simply unmap everything in the subprocess and return it to the
   263  // globalPool. This has the added benefit of reducing creation time for new
   264  // subprocesses.
   265  func (s *subprocess) Release() {
   266  	go func() { // S/R-SAFE: Platform.
   267  		s.unmap()
   268  		globalPool.mu.Lock()
   269  		globalPool.available = append(globalPool.available, s)
   270  		globalPool.mu.Unlock()
   271  	}()
   272  }
   273  
   274  // newThread creates a new traced thread.
   275  //
   276  // Precondition: the OS thread must be locked.
   277  func (s *subprocess) newThread() *thread {
   278  	// Ask the first thread to create a new one.
   279  	r := make(chan *thread)
   280  	s.requests <- r
   281  	t := <-r
   282  
   283  	// Attach the subprocess to this one.
   284  	t.attach()
   285  
   286  	// Return the new thread, which is now bound.
   287  	return t
   288  }
   289  
   290  // attach attaches to the thread.
   291  func (t *thread) attach() {
   292  	if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_ATTACH, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   293  		panic(fmt.Sprintf("unable to attach: %v", errno))
   294  	}
   295  
   296  	// PTRACE_ATTACH sends SIGSTOP, and wakes the tracee if it was already
   297  	// stopped from the SIGSTOP queued by CLONE_PTRACE (see inner loop of
   298  	// newSubprocess), so we always expect to see signal-delivery-stop with
   299  	// SIGSTOP.
   300  	if sig := t.wait(stopped); sig != unix.SIGSTOP {
   301  		panic(fmt.Sprintf("wait failed: expected SIGSTOP, got %v", sig))
   302  	}
   303  
   304  	// Initialize options.
   305  	t.init()
   306  }
   307  
   308  func (t *thread) grabInitRegs() {
   309  	// Grab registers.
   310  	//
   311  	// Note that we adjust the current register RIP value to be just before
   312  	// the current system call executed. This depends on the definition of
   313  	// the stub itself.
   314  	if err := t.getRegs(&t.initRegs); err != nil {
   315  		panic(fmt.Sprintf("ptrace get regs failed: %v", err))
   316  	}
   317  	t.adjustInitRegsRip()
   318  }
   319  
   320  // detach detaches from the thread.
   321  //
   322  // Because the SIGSTOP is not suppressed, the thread will enter group-stop.
   323  func (t *thread) detach() {
   324  	if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_DETACH, uintptr(t.tid), 0, uintptr(unix.SIGSTOP), 0, 0); errno != 0 {
   325  		panic(fmt.Sprintf("can't detach new clone: %v", errno))
   326  	}
   327  }
   328  
   329  // waitOutcome is used for wait below.
   330  type waitOutcome int
   331  
   332  const (
   333  	// stopped indicates that the process was stopped.
   334  	stopped waitOutcome = iota
   335  
   336  	// killed indicates that the process was killed.
   337  	killed
   338  )
   339  
   340  func (t *thread) dumpAndPanic(message string) {
   341  	var regs arch.Registers
   342  	message += "\n"
   343  	if err := t.getRegs(&regs); err == nil {
   344  		message += dumpRegs(&regs)
   345  	} else {
   346  		log.Warningf("unable to get registers: %v", err)
   347  	}
   348  	message += fmt.Sprintf("stubStart\t = %016x\n", stubStart)
   349  	panic(message)
   350  }
   351  
   352  func (t *thread) unexpectedStubExit() {
   353  	msg, err := t.getEventMessage()
   354  	status := unix.WaitStatus(msg)
   355  	if status.Signaled() && status.Signal() == unix.SIGKILL {
   356  		// SIGKILL can be only sent by a user or OOM-killer. In both
   357  		// these cases, we don't need to panic. There is no reasons to
   358  		// think that something wrong in gVisor.
   359  		log.Warningf("The ptrace stub process %v has been killed by SIGKILL.", t.tgid)
   360  		pid := os.Getpid()
   361  		unix.Tgkill(pid, pid, unix.Signal(unix.SIGKILL))
   362  	}
   363  	t.dumpAndPanic(fmt.Sprintf("wait failed: the process %d:%d exited: %x (err %v)", t.tgid, t.tid, msg, err))
   364  }
   365  
   366  // wait waits for a stop event.
   367  //
   368  // Precondition: outcome is a valid waitOutcome.
   369  func (t *thread) wait(outcome waitOutcome) unix.Signal {
   370  	var status unix.WaitStatus
   371  
   372  	for {
   373  		r, err := unix.Wait4(int(t.tid), &status, unix.WALL|unix.WUNTRACED, nil)
   374  		if err == unix.EINTR || err == unix.EAGAIN {
   375  			// Wait was interrupted; wait again.
   376  			continue
   377  		} else if err != nil {
   378  			panic(fmt.Sprintf("ptrace wait failed: %v", err))
   379  		}
   380  		if int(r) != int(t.tid) {
   381  			panic(fmt.Sprintf("ptrace wait returned %v, expected %v", r, t.tid))
   382  		}
   383  		switch outcome {
   384  		case stopped:
   385  			if !status.Stopped() {
   386  				t.dumpAndPanic(fmt.Sprintf("ptrace status unexpected: got %v, wanted stopped", status))
   387  			}
   388  			stopSig := status.StopSignal()
   389  			if stopSig == 0 {
   390  				continue // Spurious stop.
   391  			}
   392  			if stopSig == unix.SIGTRAP {
   393  				if status.TrapCause() == unix.PTRACE_EVENT_EXIT {
   394  					t.unexpectedStubExit()
   395  				}
   396  				// Re-encode the trap cause the way it's expected.
   397  				return stopSig | unix.Signal(status.TrapCause()<<8)
   398  			}
   399  			// Not a trap signal.
   400  			return stopSig
   401  		case killed:
   402  			if !status.Exited() && !status.Signaled() {
   403  				t.dumpAndPanic(fmt.Sprintf("ptrace status unexpected: got %v, wanted exited", status))
   404  			}
   405  			return unix.Signal(status.ExitStatus())
   406  		default:
   407  			// Should not happen.
   408  			t.dumpAndPanic(fmt.Sprintf("unknown outcome: %v", outcome))
   409  		}
   410  	}
   411  }
   412  
   413  // destroy kills the thread.
   414  //
   415  // Note that this should not be used in the general case; the death of threads
   416  // will typically cause the death of the parent. This is a utility method for
   417  // manually created threads.
   418  func (t *thread) destroy() {
   419  	t.detach()
   420  	unix.Tgkill(int(t.tgid), int(t.tid), unix.Signal(unix.SIGKILL))
   421  	t.wait(killed)
   422  }
   423  
   424  // init initializes trace options.
   425  func (t *thread) init() {
   426  	// Set the TRACESYSGOOD option to differentiate real SIGTRAP.
   427  	// set PTRACE_O_EXITKILL to ensure that the unexpected exit of the
   428  	// sentry will immediately kill the associated stubs.
   429  	const PTRACE_O_EXITKILL = 0x100000
   430  	_, _, errno := unix.RawSyscall6(
   431  		unix.SYS_PTRACE,
   432  		unix.PTRACE_SETOPTIONS,
   433  		uintptr(t.tid),
   434  		0,
   435  		unix.PTRACE_O_TRACESYSGOOD|unix.PTRACE_O_TRACEEXIT|PTRACE_O_EXITKILL,
   436  		0, 0)
   437  	if errno != 0 {
   438  		panic(fmt.Sprintf("ptrace set options failed: %v", errno))
   439  	}
   440  }
   441  
   442  // syscall executes a system call cycle in the traced context.
   443  //
   444  // This is _not_ for use by application system calls, rather it is for use when
   445  // a system call must be injected into the remote context (e.g. mmap, munmap).
   446  // Note that clones are handled separately.
   447  func (t *thread) syscall(regs *arch.Registers) (uintptr, error) {
   448  	// Set registers.
   449  	if err := t.setRegs(regs); err != nil {
   450  		panic(fmt.Sprintf("ptrace set regs failed: %v", err))
   451  	}
   452  
   453  	for {
   454  		// Execute the syscall instruction. The task has to stop on the
   455  		// trap instruction which is right after the syscall
   456  		// instruction.
   457  		if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_CONT, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   458  			panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
   459  		}
   460  
   461  		sig := t.wait(stopped)
   462  		if sig == unix.SIGTRAP {
   463  			// Reached syscall-enter-stop.
   464  			break
   465  		} else {
   466  			// Some other signal caused a thread stop; ignore.
   467  			if sig != unix.SIGSTOP && sig != unix.SIGCHLD {
   468  				log.Warningf("The thread %d:%d has been interrupted by %d", t.tgid, t.tid, sig)
   469  			}
   470  			continue
   471  		}
   472  	}
   473  
   474  	// Grab registers.
   475  	if err := t.getRegs(regs); err != nil {
   476  		panic(fmt.Sprintf("ptrace get regs failed: %v", err))
   477  	}
   478  
   479  	return syscallReturnValue(regs)
   480  }
   481  
   482  // syscallIgnoreInterrupt ignores interrupts on the system call thread and
   483  // restarts the syscall if the kernel indicates that should happen.
   484  func (t *thread) syscallIgnoreInterrupt(
   485  	initRegs *arch.Registers,
   486  	sysno uintptr,
   487  	args ...arch.SyscallArgument) (uintptr, error) {
   488  	for {
   489  		regs := createSyscallRegs(initRegs, sysno, args...)
   490  		rval, err := t.syscall(&regs)
   491  		switch err {
   492  		case ERESTARTSYS:
   493  			continue
   494  		case ERESTARTNOINTR:
   495  			continue
   496  		case ERESTARTNOHAND:
   497  			continue
   498  		default:
   499  			return rval, err
   500  		}
   501  	}
   502  }
   503  
   504  // NotifyInterrupt implements interrupt.Receiver.NotifyInterrupt.
   505  func (t *thread) NotifyInterrupt() {
   506  	unix.Tgkill(int(t.tgid), int(t.tid), unix.Signal(platform.SignalInterrupt))
   507  }
   508  
   509  // switchToApp is called from the main SwitchToApp entrypoint.
   510  //
   511  // This function returns true on a system call, false on a signal.
   512  func (s *subprocess) switchToApp(c *context, ac *arch.Context64) bool {
   513  	// Lock the thread for ptrace operations.
   514  	runtime.LockOSThread()
   515  	defer runtime.UnlockOSThread()
   516  
   517  	// Extract floating point state.
   518  	fpState := ac.FloatingPointData()
   519  
   520  	// Grab our thread from the pool.
   521  	currentTID := int32(hosttid.Current())
   522  	t := s.sysemuThreads.lookupOrCreate(currentTID, s.newThread)
   523  
   524  	// Reset necessary registers.
   525  	regs := &ac.StateData().Regs
   526  	t.resetSysemuRegs(regs)
   527  
   528  	// Extract TLS register
   529  	tls := uint64(ac.TLS())
   530  
   531  	// Check for interrupts, and ensure that future interrupts will signal t.
   532  	if !c.interrupt.Enable(t) {
   533  		// Pending interrupt; simulate.
   534  		c.signalInfo = linux.SignalInfo{Signo: int32(platform.SignalInterrupt)}
   535  		return false
   536  	}
   537  	defer c.interrupt.Disable()
   538  
   539  	// Set registers.
   540  	if err := t.setRegs(regs); err != nil {
   541  		panic(fmt.Sprintf("ptrace set regs (%+v) failed: %v", regs, err))
   542  	}
   543  	if err := t.setFPRegs(fpState, &c.archContext); err != nil {
   544  		panic(fmt.Sprintf("ptrace set fpregs (%+v) failed: %v", fpState, err))
   545  	}
   546  	if err := t.setTLS(&tls); err != nil {
   547  		panic(fmt.Sprintf("ptrace set tls (%+v) failed: %v", tls, err))
   548  	}
   549  
   550  	for {
   551  		// Start running until the next system call.
   552  		if isSingleStepping(regs) {
   553  			if _, _, errno := unix.RawSyscall6(
   554  				unix.SYS_PTRACE,
   555  				unix.PTRACE_SYSEMU_SINGLESTEP,
   556  				uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   557  				panic(fmt.Sprintf("ptrace sysemu failed: %v", errno))
   558  			}
   559  		} else {
   560  			if _, _, errno := unix.RawSyscall6(
   561  				unix.SYS_PTRACE,
   562  				unix.PTRACE_SYSEMU,
   563  				uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   564  				panic(fmt.Sprintf("ptrace sysemu failed: %v", errno))
   565  			}
   566  		}
   567  
   568  		// Wait for the syscall-enter stop.
   569  		sig := t.wait(stopped)
   570  
   571  		if sig == unix.SIGSTOP {
   572  			// SIGSTOP was delivered to another thread in the same thread
   573  			// group, which initiated another group stop. Just ignore it.
   574  			continue
   575  		}
   576  
   577  		// Refresh all registers.
   578  		if err := t.getRegs(regs); err != nil {
   579  			panic(fmt.Sprintf("ptrace get regs failed: %v", err))
   580  		}
   581  		if err := t.getFPRegs(fpState, &c.archContext); err != nil {
   582  			panic(fmt.Sprintf("ptrace get fpregs failed: %v", err))
   583  		}
   584  		if err := t.getTLS(&tls); err != nil {
   585  			panic(fmt.Sprintf("ptrace get tls failed: %v", err))
   586  		}
   587  		if !ac.SetTLS(uintptr(tls)) {
   588  			panic(fmt.Sprintf("tls value %v is invalid", tls))
   589  		}
   590  
   591  		// Is it a system call?
   592  		if sig == (syscallEvent | unix.SIGTRAP) {
   593  			s.arm64SyscallWorkaround(t, regs)
   594  
   595  			// Ensure registers are sane.
   596  			updateSyscallRegs(regs)
   597  			return true
   598  		}
   599  
   600  		// Grab signal information.
   601  		if err := t.getSignalInfo(&c.signalInfo); err != nil {
   602  			// Should never happen.
   603  			panic(fmt.Sprintf("ptrace get signal info failed: %v", err))
   604  		}
   605  
   606  		// We have a signal. We verify however, that the signal was
   607  		// either delivered from the kernel or from this process. We
   608  		// don't respect other signals.
   609  		if c.signalInfo.Code > 0 {
   610  			// The signal was generated by the kernel. We inspect
   611  			// the signal information, and may patch it in order to
   612  			// facilitate vsyscall emulation. See patchSignalInfo.
   613  			patchSignalInfo(regs, &c.signalInfo)
   614  			return false
   615  		} else if c.signalInfo.Code <= 0 && c.signalInfo.PID() == int32(os.Getpid()) {
   616  			// The signal was generated by this process. That means
   617  			// that it was an interrupt or something else that we
   618  			// should bail for. Note that we ignore signals
   619  			// generated by other processes.
   620  			return false
   621  		}
   622  	}
   623  }
   624  
   625  // syscall executes the given system call without handling interruptions.
   626  func (s *subprocess) syscall(sysno uintptr, args ...arch.SyscallArgument) (uintptr, error) {
   627  	// Grab a thread.
   628  	runtime.LockOSThread()
   629  	defer runtime.UnlockOSThread()
   630  	currentTID := int32(hosttid.Current())
   631  	t := s.syscallThreads.lookupOrCreate(currentTID, s.newThread)
   632  
   633  	return t.syscallIgnoreInterrupt(&t.initRegs, sysno, args...)
   634  }
   635  
   636  // MapFile implements platform.AddressSpace.MapFile.
   637  func (s *subprocess) MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error {
   638  	var flags int
   639  	if precommit {
   640  		flags |= unix.MAP_POPULATE
   641  	}
   642  	_, err := s.syscall(
   643  		unix.SYS_MMAP,
   644  		arch.SyscallArgument{Value: uintptr(addr)},
   645  		arch.SyscallArgument{Value: uintptr(fr.Length())},
   646  		arch.SyscallArgument{Value: uintptr(at.Prot())},
   647  		arch.SyscallArgument{Value: uintptr(flags | unix.MAP_SHARED | unix.MAP_FIXED)},
   648  		arch.SyscallArgument{Value: uintptr(f.FD())},
   649  		arch.SyscallArgument{Value: uintptr(fr.Start)})
   650  	return err
   651  }
   652  
   653  // Unmap implements platform.AddressSpace.Unmap.
   654  func (s *subprocess) Unmap(addr hostarch.Addr, length uint64) {
   655  	ar, ok := addr.ToRange(length)
   656  	if !ok {
   657  		panic(fmt.Sprintf("addr %#x + length %#x overflows", addr, length))
   658  	}
   659  	s.mu.Lock()
   660  	for c := range s.contexts {
   661  		c.mu.Lock()
   662  		if c.lastFaultSP == s && ar.Contains(c.lastFaultAddr) {
   663  			// Forget the last fault so that if c faults again, the fault isn't
   664  			// incorrectly reported as a write fault. If this is being called
   665  			// due to munmap() of the corresponding vma, handling of the second
   666  			// fault will fail anyway.
   667  			c.lastFaultSP = nil
   668  			delete(s.contexts, c)
   669  		}
   670  		c.mu.Unlock()
   671  	}
   672  	s.mu.Unlock()
   673  	_, err := s.syscall(
   674  		unix.SYS_MUNMAP,
   675  		arch.SyscallArgument{Value: uintptr(addr)},
   676  		arch.SyscallArgument{Value: uintptr(length)})
   677  	if err != nil {
   678  		// We never expect this to happen.
   679  		panic(fmt.Sprintf("munmap(%x, %x)) failed: %v", addr, length, err))
   680  	}
   681  }
   682  
   683  // PreFork implements platform.AddressSpace.PreFork.
   684  func (s *subprocess) PreFork() {}
   685  
   686  // PostFork implements platform.AddressSpace.PostFork.
   687  func (s *subprocess) PostFork() {}