github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/platform/systrap/subprocess.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package systrap
    16  
    17  import (
    18  	"fmt"
    19  	"os"
    20  	"runtime"
    21  	"sync"
    22  	"sync/atomic"
    23  
    24  	"golang.org/x/sys/unix"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/log"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/pool"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/seccomp"
    30  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/arch"
    31  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/memmap"
    32  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/pgalloc"
    33  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/platform"
    34  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/platform/systrap/sysmsg"
    35  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/platform/systrap/usertrap"
    36  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/usage"
    37  )
    38  
    39  var (
    40  	// globalPool tracks all subprocesses in various state: active or available for
    41  	// reuse.
    42  	globalPool = subprocessPool{}
    43  
    44  	// maximumUserAddress is the largest possible user address.
    45  	maximumUserAddress = linux.TaskSize
    46  
    47  	// stubInitAddress is the initial attempt link address for the stub.
    48  	stubInitAddress = linux.TaskSize
    49  
    50  	// maxRandomOffsetOfStubAddress is the maximum offset for randomizing a
    51  	// stub address. It is set to the default value of mm.mmap_rnd_bits.
    52  	//
    53  	// Note: Tools like ThreadSanitizer don't like when the memory layout
    54  	// is changed significantly.
    55  	maxRandomOffsetOfStubAddress = (linux.TaskSize >> 7) & ^(uintptr(hostarch.PageSize) - 1)
    56  
    57  	// maxStubUserAddress is the largest possible user address for
    58  	// processes running inside gVisor. It is fixed because
    59  	// * we don't want to reveal a stub address.
    60  	// * it has to be the same across checkpoint/restore.
    61  	maxStubUserAddress = maximumUserAddress - maxRandomOffsetOfStubAddress
    62  )
    63  
    64  // Linux kernel errnos which "should never be seen by user programs", but will
    65  // be revealed to ptrace syscall exit tracing.
    66  //
    67  // These constants are only used in subprocess.go.
    68  const (
    69  	ERESTARTSYS    = unix.Errno(512)
    70  	ERESTARTNOINTR = unix.Errno(513)
    71  	ERESTARTNOHAND = unix.Errno(514)
    72  )
    73  
    74  // thread is a traced thread; it is a thread identifier.
    75  //
    76  // This is a convenience type for defining ptrace operations.
    77  type thread struct {
    78  	tgid int32
    79  	tid  int32
    80  
    81  	// sysmsgStackID is a stack ID in subprocess.sysmsgStackPool.
    82  	sysmsgStackID uint64
    83  
    84  	// initRegs are the initial registers for the first thread.
    85  	//
    86  	// These are used for the register set for system calls.
    87  	initRegs arch.Registers
    88  }
    89  
    90  // requestThread is used to request a new sysmsg thread. A thread identifier will
    91  // be sent into the thread channel.
    92  type requestThread struct {
    93  	thread chan *thread
    94  }
    95  
    96  // requestStub is used to request a new stub process.
    97  type requestStub struct {
    98  	done chan *thread
    99  }
   100  
   101  // maxSysmsgThreads specifies the maximum number of system threads that a
   102  // subprocess can create in context decoupled mode.
   103  // TODO(b/268366549): Replace maxSystemThreads below.
   104  var maxSysmsgThreads = runtime.GOMAXPROCS(0)
   105  
   106  const (
   107  	// maxSystemThreads specifies the maximum number of system threads that a
   108  	// subprocess may create in order to process the contexts.
   109  	maxSystemThreads = 4096
   110  	// maxGuestContexts specifies the maximum number of task contexts that a
   111  	// subprocess can handle.
   112  	maxGuestContexts = 4095
   113  	// invalidContextID specifies an invalid ID.
   114  	invalidContextID uint32 = 0xfefefefe
   115  	// invalidThreadID is used to indicate that a context is not being worked on by
   116  	// any sysmsg thread.
   117  	invalidThreadID uint32 = 0xfefefefe
   118  )
   119  
   120  // subprocess is a collection of threads being traced.
   121  type subprocess struct {
   122  	platform.NoAddressSpaceIO
   123  	subprocessRefs
   124  
   125  	// requests is used to signal creation of new threads.
   126  	requests chan any
   127  
   128  	// sysmsgInitRegs is used to reset sysemu regs.
   129  	sysmsgInitRegs arch.Registers
   130  
   131  	// mu protects the following fields.
   132  	mu sync.Mutex
   133  
   134  	// faultedContexts is the set of contexts for which it's possible that
   135  	// context.lastFaultSP == this subprocess.
   136  	faultedContexts map[*context]struct{}
   137  
   138  	// sysmsgStackPool is a pool of available sysmsg stacks.
   139  	sysmsgStackPool pool.Pool
   140  
   141  	// threadContextPool is a pool of available sysmsg.ThreadContext IDs.
   142  	threadContextPool pool.Pool
   143  
   144  	// threadContextRegion defines the ThreadContext memory region start
   145  	// within the sentry address space.
   146  	threadContextRegion uintptr
   147  
   148  	// memoryFile is used to allocate a sysmsg stack which is shared
   149  	// between a stub process and the Sentry.
   150  	memoryFile *pgalloc.MemoryFile
   151  
   152  	// usertrap is the state of the usertrap table which contains syscall
   153  	// trampolines.
   154  	usertrap *usertrap.State
   155  
   156  	syscallThreadMu sync.Mutex
   157  	syscallThread   *syscallThread
   158  
   159  	// sysmsgThreadsMu protects sysmsgThreads and numSysmsgThreads
   160  	sysmsgThreadsMu sync.Mutex
   161  	// sysmsgThreads is a collection of all active sysmsg threads in the
   162  	// subprocess.
   163  	sysmsgThreads map[uint32]*sysmsgThread
   164  	// numSysmsgThreads counts the number of active sysmsg threads; we use a
   165  	// counter instead of using len(sysmsgThreads) because we need to synchronize
   166  	// how many threads get created _before_ the creation happens.
   167  	numSysmsgThreads int
   168  
   169  	// contextQueue is a queue of all contexts that are ready to switch back to
   170  	// user mode.
   171  	contextQueue *contextQueue
   172  }
   173  
   174  func (s *subprocess) initSyscallThread(ptraceThread *thread) error {
   175  	s.syscallThreadMu.Lock()
   176  	defer s.syscallThreadMu.Unlock()
   177  
   178  	id, ok := s.sysmsgStackPool.Get()
   179  	if !ok {
   180  		panic("unable to allocate a sysmsg stub thread")
   181  	}
   182  
   183  	ptraceThread.sysmsgStackID = id
   184  	t := syscallThread{
   185  		subproc: s,
   186  		thread:  ptraceThread,
   187  	}
   188  
   189  	if err := t.init(); err != nil {
   190  		panic(fmt.Sprintf("failed to create a syscall thread"))
   191  	}
   192  	s.syscallThread = &t
   193  
   194  	s.syscallThread.detach()
   195  
   196  	return nil
   197  }
   198  
   199  // handlePtraceSyscallRequest executes system calls that can't be run via
   200  // syscallThread without using ptrace. Look at the description of syscallThread
   201  // to get more details about its limitations.
   202  func (s *subprocess) handlePtraceSyscallRequest(req any) {
   203  	s.syscallThreadMu.Lock()
   204  	defer s.syscallThreadMu.Unlock()
   205  	runtime.LockOSThread()
   206  	defer runtime.UnlockOSThread()
   207  	s.syscallThread.attach()
   208  	defer s.syscallThread.detach()
   209  
   210  	ptraceThread := s.syscallThread.thread
   211  
   212  	switch req.(type) {
   213  	case requestThread:
   214  		r := req.(requestThread)
   215  		t, err := ptraceThread.clone()
   216  		if err != nil {
   217  			// Should not happen: not recoverable.
   218  			panic(fmt.Sprintf("error initializing first thread: %v", err))
   219  		}
   220  
   221  		// Since the new thread was created with
   222  		// clone(CLONE_PTRACE), it will begin execution with
   223  		// SIGSTOP pending and with this thread as its tracer.
   224  		// (Hopefully nobody tgkilled it with a signal <
   225  		// SIGSTOP before the SIGSTOP was delivered, in which
   226  		// case that signal would be delivered before SIGSTOP.)
   227  		if sig := t.wait(stopped); sig != unix.SIGSTOP {
   228  			panic(fmt.Sprintf("error waiting for new clone: expected SIGSTOP, got %v", sig))
   229  		}
   230  
   231  		id, ok := s.sysmsgStackPool.Get()
   232  		if !ok {
   233  			panic("unable to allocate a sysmsg stub thread")
   234  		}
   235  		t.sysmsgStackID = id
   236  
   237  		if _, _, e := unix.RawSyscall(unix.SYS_TGKILL, uintptr(t.tgid), uintptr(t.tid), uintptr(unix.SIGSTOP)); e != 0 {
   238  			panic(fmt.Sprintf("tkill failed: %v", e))
   239  		}
   240  
   241  		// Detach the thread.
   242  		t.detach()
   243  		t.initRegs = ptraceThread.initRegs
   244  
   245  		// Return the thread.
   246  		r.thread <- t
   247  	case requestStub:
   248  		r := req.(requestStub)
   249  		t, err := ptraceThread.createStub()
   250  		if err != nil {
   251  			panic(fmt.Sprintf("unable to create a stub process: %s", err))
   252  		}
   253  		r.done <- t
   254  
   255  	}
   256  }
   257  
   258  // newSubprocess returns a usable subprocess.
   259  //
   260  // This will either be a newly created subprocess, or one from the global pool.
   261  // The create function will be called in the latter case, which is guaranteed
   262  // to happen with the runtime thread locked.
   263  func newSubprocess(create func() (*thread, error), memoryFile *pgalloc.MemoryFile) (*subprocess, error) {
   264  	if sp := globalPool.fetchAvailable(); sp != nil {
   265  		sp.subprocessRefs.InitRefs()
   266  		sp.usertrap = usertrap.New()
   267  		return sp, nil
   268  	}
   269  
   270  	// The following goroutine is responsible for creating the first traced
   271  	// thread, and responding to requests to make additional threads in the
   272  	// traced process. The process will be killed and reaped when the
   273  	// request channel is closed, which happens in Release below.
   274  	requests := make(chan any)
   275  
   276  	// Ready.
   277  	sp := &subprocess{
   278  		requests:          requests,
   279  		faultedContexts:   make(map[*context]struct{}),
   280  		sysmsgStackPool:   pool.Pool{Start: 0, Limit: maxSystemThreads},
   281  		threadContextPool: pool.Pool{Start: 0, Limit: maxGuestContexts},
   282  		memoryFile:        memoryFile,
   283  		sysmsgThreads:     make(map[uint32]*sysmsgThread),
   284  	}
   285  	sp.subprocessRefs.InitRefs()
   286  	runtime.LockOSThread()
   287  	defer runtime.UnlockOSThread()
   288  
   289  	// Initialize the syscall thread.
   290  	ptraceThread, err := create()
   291  	if err != nil {
   292  		return nil, err
   293  	}
   294  	sp.sysmsgInitRegs = ptraceThread.initRegs
   295  
   296  	if err := sp.initSyscallThread(ptraceThread); err != nil {
   297  		return nil, err
   298  	}
   299  
   300  	go func() { // S/R-SAFE: Platform-related.
   301  
   302  		// Wait for requests to create threads.
   303  		for req := range requests {
   304  			sp.handlePtraceSyscallRequest(req)
   305  		}
   306  
   307  		// Requests should never be closed.
   308  		panic("unreachable")
   309  	}()
   310  
   311  	sp.unmap()
   312  	sp.usertrap = usertrap.New()
   313  	sp.mapSharedRegions()
   314  	sp.mapPrivateRegions()
   315  
   316  	// Create the initial sysmsg thread.
   317  	atomic.AddUint32(&sp.contextQueue.numThreadsToWakeup, 1)
   318  	if err := sp.createSysmsgThread(); err != nil {
   319  		return nil, err
   320  	}
   321  	sp.numSysmsgThreads++
   322  
   323  	return sp, nil
   324  }
   325  
   326  // mapSharedRegions maps the shared regions that are used between the subprocess
   327  // and ALL of the subsequently created sysmsg threads into both the sentry and
   328  // the syscall thread.
   329  //
   330  // Should be called before any sysmsg threads are created.
   331  // Initializes s.contextQueue and s.threadContextRegion.
   332  func (s *subprocess) mapSharedRegions() {
   333  	if s.contextQueue != nil || s.threadContextRegion != 0 {
   334  		panic("contextQueue or threadContextRegion was already initialized")
   335  	}
   336  
   337  	opts := pgalloc.AllocOpts{
   338  		Kind: usage.System,
   339  		Dir:  pgalloc.TopDown,
   340  	}
   341  
   342  	// Map shared regions into the sentry.
   343  	contextQueueFR, contextQueue := mmapContextQueueForSentry(s.memoryFile, opts)
   344  	contextQueue.init()
   345  
   346  	// Map thread context region into the syscall thread.
   347  	_, err := s.syscallThread.syscall(
   348  		unix.SYS_MMAP,
   349  		arch.SyscallArgument{Value: uintptr(stubContextQueueRegion)},
   350  		arch.SyscallArgument{Value: uintptr(contextQueueFR.Length())},
   351  		arch.SyscallArgument{Value: uintptr(unix.PROT_READ | unix.PROT_WRITE)},
   352  		arch.SyscallArgument{Value: uintptr(unix.MAP_SHARED | unix.MAP_FILE | unix.MAP_FIXED)},
   353  		arch.SyscallArgument{Value: uintptr(s.memoryFile.FD())},
   354  		arch.SyscallArgument{Value: uintptr(contextQueueFR.Start)})
   355  	if err != nil {
   356  		panic(fmt.Sprintf("failed to mmap context queue region into syscall thread: %v", err))
   357  	}
   358  
   359  	s.contextQueue = contextQueue
   360  
   361  	// Map thread context region into the sentry.
   362  	threadContextFR, err := s.memoryFile.Allocate(uint64(stubContextRegionLen), opts)
   363  	if err != nil {
   364  		panic(fmt.Sprintf("failed to allocate a new subprocess context memory region"))
   365  	}
   366  	sentryThreadContextRegionAddr, _, errno := unix.RawSyscall6(
   367  		unix.SYS_MMAP,
   368  		0,
   369  		uintptr(threadContextFR.Length()),
   370  		unix.PROT_WRITE|unix.PROT_READ,
   371  		unix.MAP_SHARED|unix.MAP_FILE,
   372  		uintptr(s.memoryFile.FD()), uintptr(threadContextFR.Start))
   373  	if errno != 0 {
   374  		panic(fmt.Sprintf("mmap failed for subprocess context memory region: %v", errno))
   375  	}
   376  
   377  	// Map thread context region into the syscall thread.
   378  	if _, err := s.syscallThread.syscall(
   379  		unix.SYS_MMAP,
   380  		arch.SyscallArgument{Value: uintptr(stubContextRegion)},
   381  		arch.SyscallArgument{Value: uintptr(threadContextFR.Length())},
   382  		arch.SyscallArgument{Value: uintptr(unix.PROT_READ | unix.PROT_WRITE)},
   383  		arch.SyscallArgument{Value: uintptr(unix.MAP_SHARED | unix.MAP_FILE | unix.MAP_FIXED)},
   384  		arch.SyscallArgument{Value: uintptr(s.memoryFile.FD())},
   385  		arch.SyscallArgument{Value: uintptr(threadContextFR.Start)}); err != nil {
   386  		panic(fmt.Sprintf("failed to mmap context queue region into syscall thread: %v", err))
   387  	}
   388  
   389  	s.threadContextRegion = sentryThreadContextRegionAddr
   390  }
   391  
   392  func (s *subprocess) mapPrivateRegions() {
   393  	_, err := s.syscallThread.syscall(
   394  		unix.SYS_MMAP,
   395  		arch.SyscallArgument{Value: uintptr(stubSpinningThreadQueueAddr)},
   396  		arch.SyscallArgument{Value: uintptr(sysmsg.SpinningQueueMemSize)},
   397  		arch.SyscallArgument{Value: uintptr(unix.PROT_READ | unix.PROT_WRITE)},
   398  		arch.SyscallArgument{Value: uintptr(unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_FIXED)},
   399  		arch.SyscallArgument{Value: 0},
   400  		arch.SyscallArgument{Value: 0})
   401  	if err != nil {
   402  		panic(fmt.Sprintf("failed to mmap spinning queue region into syscall thread: %v", err))
   403  	}
   404  }
   405  
   406  // unmap unmaps non-stub regions of the process.
   407  //
   408  // This will panic on failure (which should never happen).
   409  func (s *subprocess) unmap() {
   410  	s.Unmap(0, uint64(stubStart))
   411  	if maximumUserAddress != stubEnd {
   412  		s.Unmap(hostarch.Addr(stubEnd), uint64(maximumUserAddress-stubEnd))
   413  	}
   414  }
   415  
   416  // Release kills the subprocess.
   417  //
   418  // Just kidding! We can't safely co-ordinate the detaching of all the
   419  // tracees (since the tracers are random runtime threads, and the process
   420  // won't exit until tracers have been notifier).
   421  //
   422  // Therefore we simply unmap everything in the subprocess and return it to the
   423  // globalPool. This has the added benefit of reducing creation time for new
   424  // subprocesses.
   425  func (s *subprocess) Release() {
   426  	s.unmap()
   427  	s.DecRef(s.release)
   428  }
   429  
   430  // release returns the subprocess to the global pool.
   431  func (s *subprocess) release() {
   432  	globalPool.markAvailable(s)
   433  }
   434  
   435  // newThread creates a new traced thread.
   436  //
   437  // Precondition: the OS thread must be locked.
   438  func (s *subprocess) newThread() *thread {
   439  	// Ask the first thread to create a new one.
   440  	var r requestThread
   441  	r.thread = make(chan *thread)
   442  	s.requests <- r
   443  	t := <-r.thread
   444  
   445  	// Attach the subprocess to this one.
   446  	t.attach()
   447  
   448  	// Return the new thread, which is now bound.
   449  	return t
   450  }
   451  
   452  // attach attaches to the thread.
   453  func (t *thread) attach() {
   454  	if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_ATTACH, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   455  		panic(fmt.Sprintf("unable to attach: %v", errno))
   456  	}
   457  
   458  	// PTRACE_ATTACH sends SIGSTOP, and wakes the tracee if it was already
   459  	// stopped from the SIGSTOP queued by CLONE_PTRACE (see inner loop of
   460  	// newSubprocess), so we always expect to see signal-delivery-stop with
   461  	// SIGSTOP.
   462  	if sig := t.wait(stopped); sig != unix.SIGSTOP {
   463  		panic(fmt.Sprintf("wait failed: expected SIGSTOP, got %v", sig))
   464  	}
   465  
   466  	// Initialize options.
   467  	t.init()
   468  }
   469  
   470  func (t *thread) grabInitRegs() {
   471  	// Grab registers.
   472  	//
   473  	// Note that we adjust the current register RIP value to be just before
   474  	// the current system call executed. This depends on the definition of
   475  	// the stub itself.
   476  	if err := t.getRegs(&t.initRegs); err != nil {
   477  		panic(fmt.Sprintf("ptrace get regs failed: %v", err))
   478  	}
   479  	t.adjustInitRegsRip()
   480  	t.initRegs.SetStackPointer(0)
   481  }
   482  
   483  // detach detaches from the thread.
   484  //
   485  // Because the SIGSTOP is not suppressed, the thread will enter group-stop.
   486  func (t *thread) detach() {
   487  	if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_DETACH, uintptr(t.tid), 0, uintptr(unix.SIGSTOP), 0, 0); errno != 0 {
   488  		panic(fmt.Sprintf("can't detach new clone: %v", errno))
   489  	}
   490  }
   491  
   492  // waitOutcome is used for wait below.
   493  type waitOutcome int
   494  
   495  const (
   496  	// stopped indicates that the process was stopped.
   497  	stopped waitOutcome = iota
   498  
   499  	// killed indicates that the process was killed.
   500  	killed
   501  )
   502  
   503  func (t *thread) Debugf(format string, v ...any) {
   504  	prefix := fmt.Sprintf("%8d:", t.tid)
   505  	log.DebugfAtDepth(1, prefix+format, v...)
   506  }
   507  
   508  func (t *thread) dumpAndPanic(message string) {
   509  	var regs arch.Registers
   510  	message += "\n"
   511  	if err := t.getRegs(&regs); err == nil {
   512  		message += dumpRegs(&regs)
   513  	} else {
   514  		log.Warningf("unable to get registers: %v", err)
   515  	}
   516  	message += fmt.Sprintf("stubStart\t = %016x\n", stubStart)
   517  	panic(message)
   518  }
   519  
   520  func (t *thread) dumpRegs(message string) {
   521  	var regs arch.Registers
   522  	message += "\n"
   523  	if err := t.getRegs(&regs); err == nil {
   524  		message += dumpRegs(&regs)
   525  	} else {
   526  		log.Warningf("unable to get registers: %v", err)
   527  	}
   528  	log.Infof("%s", message)
   529  }
   530  
   531  func (t *thread) unexpectedStubExit() {
   532  	msg, err := t.getEventMessage()
   533  	status := unix.WaitStatus(msg)
   534  	if status.Signaled() && status.Signal() == unix.SIGKILL {
   535  		// SIGKILL can be only sent by a user or OOM-killer. In both
   536  		// these cases, we don't need to panic. There is no reasons to
   537  		// think that something wrong in gVisor.
   538  		log.Warningf("The ptrace stub process %v has been killed by SIGKILL.", t.tgid)
   539  		pid := os.Getpid()
   540  		unix.Tgkill(pid, pid, unix.Signal(unix.SIGKILL))
   541  	}
   542  	t.dumpAndPanic(fmt.Sprintf("wait failed: the process %d:%d exited: %x (err %v)", t.tgid, t.tid, msg, err))
   543  }
   544  
   545  // wait waits for a stop event.
   546  //
   547  // Precondition: outcome is a valid waitOutcome.
   548  func (t *thread) wait(outcome waitOutcome) unix.Signal {
   549  	var status unix.WaitStatus
   550  
   551  	for {
   552  		r, err := unix.Wait4(int(t.tid), &status, unix.WALL|unix.WUNTRACED, nil)
   553  		if err == unix.EINTR || err == unix.EAGAIN {
   554  			// Wait was interrupted; wait again.
   555  			continue
   556  		} else if err != nil {
   557  			panic(fmt.Sprintf("ptrace wait failed: %v", err))
   558  		}
   559  		if int(r) != int(t.tid) {
   560  			panic(fmt.Sprintf("ptrace wait returned %v, expected %v", r, t.tid))
   561  		}
   562  		switch outcome {
   563  		case stopped:
   564  			if !status.Stopped() {
   565  				t.dumpAndPanic(fmt.Sprintf("ptrace status unexpected: got %v, wanted stopped", status))
   566  			}
   567  			stopSig := status.StopSignal()
   568  			if stopSig == 0 {
   569  				continue // Spurious stop.
   570  			}
   571  			if stopSig == unix.SIGTRAP {
   572  				if status.TrapCause() == unix.PTRACE_EVENT_EXIT {
   573  					t.unexpectedStubExit()
   574  				}
   575  				// Re-encode the trap cause the way it's expected.
   576  				return stopSig | unix.Signal(status.TrapCause()<<8)
   577  			}
   578  			// Not a trap signal.
   579  			return stopSig
   580  		case killed:
   581  			if !status.Exited() && !status.Signaled() {
   582  				t.dumpAndPanic(fmt.Sprintf("ptrace status unexpected: got %v, wanted exited", status))
   583  			}
   584  			return unix.Signal(status.ExitStatus())
   585  		default:
   586  			// Should not happen.
   587  			t.dumpAndPanic(fmt.Sprintf("unknown outcome: %v", outcome))
   588  		}
   589  	}
   590  }
   591  
   592  // destroy kills the thread.
   593  //
   594  // Note that this should not be used in the general case; the death of threads
   595  // will typically cause the death of the parent. This is a utility method for
   596  // manually created threads.
   597  func (t *thread) destroy() {
   598  	t.detach()
   599  	unix.Tgkill(int(t.tgid), int(t.tid), unix.Signal(unix.SIGKILL))
   600  	t.wait(killed)
   601  }
   602  
   603  // init initializes trace options.
   604  func (t *thread) init() {
   605  	// Set the TRACESYSGOOD option to differentiate real SIGTRAP.
   606  	// set PTRACE_O_EXITKILL to ensure that the unexpected exit of the
   607  	// sentry will immediately kill the associated stubs.
   608  	_, _, errno := unix.RawSyscall6(
   609  		unix.SYS_PTRACE,
   610  		unix.PTRACE_SETOPTIONS,
   611  		uintptr(t.tid),
   612  		0,
   613  		unix.PTRACE_O_TRACESYSGOOD|unix.PTRACE_O_TRACEEXIT|unix.PTRACE_O_EXITKILL,
   614  		0, 0)
   615  	if errno != 0 {
   616  		panic(fmt.Sprintf("ptrace set options failed: %v", errno))
   617  	}
   618  }
   619  
   620  // syscall executes a system call cycle in the traced context.
   621  //
   622  // This is _not_ for use by application system calls, rather it is for use when
   623  // a system call must be injected into the remote context (e.g. mmap, munmap).
   624  // Note that clones are handled separately.
   625  func (t *thread) syscall(regs *arch.Registers) (uintptr, error) {
   626  	// Set registers.
   627  	if err := t.setRegs(regs); err != nil {
   628  		panic(fmt.Sprintf("ptrace set regs failed: %v", err))
   629  	}
   630  
   631  	for {
   632  		// Execute the syscall instruction. The task has to stop on the
   633  		// trap instruction which is right after the syscall
   634  		// instruction.
   635  		if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_CONT, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   636  			panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
   637  		}
   638  
   639  		sig := t.wait(stopped)
   640  		if sig == unix.SIGTRAP {
   641  			// Reached syscall-enter-stop.
   642  			break
   643  		} else {
   644  			// Some other signal caused a thread stop; ignore.
   645  			if sig != unix.SIGSTOP && sig != unix.SIGCHLD {
   646  				log.Warningf("The thread %d:%d has been interrupted by %d", t.tgid, t.tid, sig)
   647  			}
   648  			continue
   649  		}
   650  	}
   651  
   652  	// Grab registers.
   653  	if err := t.getRegs(regs); err != nil {
   654  		panic(fmt.Sprintf("ptrace get regs failed: %v", err))
   655  	}
   656  	return syscallReturnValue(regs)
   657  }
   658  
   659  // syscallIgnoreInterrupt ignores interrupts on the system call thread and
   660  // restarts the syscall if the kernel indicates that should happen.
   661  func (t *thread) syscallIgnoreInterrupt(
   662  	initRegs *arch.Registers,
   663  	sysno uintptr,
   664  	args ...arch.SyscallArgument) (uintptr, error) {
   665  	for {
   666  		regs := createSyscallRegs(initRegs, sysno, args...)
   667  		rval, err := t.syscall(&regs)
   668  		switch err {
   669  		case ERESTARTSYS:
   670  			continue
   671  		case ERESTARTNOINTR:
   672  			continue
   673  		case ERESTARTNOHAND:
   674  			continue
   675  		default:
   676  			return rval, err
   677  		}
   678  	}
   679  }
   680  
   681  // NotifyInterrupt implements interrupt.Receiver.NotifyInterrupt.
   682  func (t *thread) NotifyInterrupt() {
   683  	unix.Tgkill(int(t.tgid), int(t.tid), unix.Signal(platform.SignalInterrupt))
   684  }
   685  
   686  func (s *subprocess) incAwakeContexts() {
   687  	nr := atomic.AddUint32(&s.contextQueue.numAwakeContexts, 1)
   688  	if nr > uint32(maxSysmsgThreads) {
   689  		return
   690  	}
   691  	nr = nrMaxAwakeStubThreads.Add(1)
   692  	if nr > fastPathContextLimit {
   693  		dispatcher.disableStubFastPath()
   694  	}
   695  }
   696  
   697  func (s *subprocess) decAwakeContexts() {
   698  	nr := atomic.AddUint32(&s.contextQueue.numAwakeContexts, ^uint32(0))
   699  	if nr >= uint32(maxSysmsgThreads) {
   700  		return
   701  	}
   702  	nrMaxAwakeStubThreads.Add(^uint32(0))
   703  }
   704  
   705  // switchToApp is called from the main SwitchToApp entrypoint.
   706  //
   707  // This function returns true on a system call, false on a signal.
   708  // The second return value is true if a syscall instruction can be replaced on
   709  // a function call.
   710  func (s *subprocess) switchToApp(c *context, ac *arch.Context64) (isSyscall bool, shouldPatchSyscall bool, err error) {
   711  	// Reset necessary registers.
   712  	regs := &ac.StateData().Regs
   713  	s.resetSysemuRegs(regs)
   714  	ctx := c.sharedContext
   715  	ctx.shared.Regs = regs.PtraceRegs
   716  	restoreArchSpecificState(ctx.shared, ac)
   717  
   718  	// Check for interrupts, and ensure that future interrupts signal the context.
   719  	if !c.interrupt.Enable(c.sharedContext) {
   720  		// Pending interrupt; simulate.
   721  		ctx.clearInterrupt()
   722  		c.signalInfo = linux.SignalInfo{Signo: int32(platform.SignalInterrupt)}
   723  		return false, false, nil
   724  	}
   725  	defer func() {
   726  		ctx.clearInterrupt()
   727  		c.interrupt.Disable()
   728  	}()
   729  
   730  	restoreFPState(ctx, c, ac)
   731  
   732  	// Place the context onto the context queue.
   733  	if ctx.sleeping {
   734  		ctx.sleeping = false
   735  		s.incAwakeContexts()
   736  	}
   737  	stubFastPathEnabled := dispatcher.stubFastPathEnabled()
   738  	ctx.setState(sysmsg.ContextStateNone)
   739  	s.contextQueue.add(ctx, stubFastPathEnabled)
   740  	s.waitOnState(ctx, stubFastPathEnabled)
   741  
   742  	// Check if there's been an error.
   743  	threadID := ctx.threadID()
   744  	if threadID != invalidThreadID {
   745  		if sysThread, ok := s.sysmsgThreads[threadID]; ok && sysThread.msg.Err != 0 {
   746  			msg := sysThread.msg
   747  			panic(fmt.Sprintf("stub thread %d failed: err 0x%x line %d: %s", sysThread.thread.tid, msg.Err, msg.Line, msg))
   748  		}
   749  		log.Warningf("systrap: found unexpected ThreadContext.ThreadID field, expected %d found %d", invalidThreadID, threadID)
   750  	}
   751  
   752  	// Copy register state locally.
   753  	regs.PtraceRegs = ctx.shared.Regs
   754  	retrieveArchSpecificState(ctx.shared, ac)
   755  	c.needToPullFullState = true
   756  	// We have a signal. We verify however, that the signal was
   757  	// either delivered from the kernel or from this process. We
   758  	// don't respect other signals.
   759  	c.signalInfo = ctx.shared.SignalInfo
   760  	ctxState := ctx.state()
   761  	if ctxState == sysmsg.ContextStateSyscallCanBePatched {
   762  		ctxState = sysmsg.ContextStateSyscall
   763  		shouldPatchSyscall = true
   764  	}
   765  
   766  	if ctxState == sysmsg.ContextStateSyscall || ctxState == sysmsg.ContextStateSyscallTrap {
   767  		if maybePatchSignalInfo(regs, &c.signalInfo) {
   768  			return false, false, nil
   769  		}
   770  		updateSyscallRegs(regs)
   771  		return true, shouldPatchSyscall, nil
   772  	} else if ctxState != sysmsg.ContextStateFault {
   773  		panic(fmt.Sprintf("unknown context state: %v", ctxState))
   774  	}
   775  
   776  	return false, false, nil
   777  }
   778  
   779  func (s *subprocess) waitOnState(ctx *sharedContext, stubFastPathEnabled bool) {
   780  	ctx.kicked = false
   781  	slowPath := false
   782  	start := cputicks()
   783  	ctx.startWaitingTS = start
   784  	if !stubFastPathEnabled || atomic.LoadUint32(&s.contextQueue.numActiveThreads) == 0 {
   785  		ctx.kicked = s.kickSysmsgThread()
   786  	}
   787  	for curState := ctx.state(); curState == sysmsg.ContextStateNone; curState = ctx.state() {
   788  		if !slowPath {
   789  			events := dispatcher.waitFor(ctx)
   790  			if events&sharedContextKicked != 0 {
   791  				if ctx.kicked {
   792  					continue
   793  				}
   794  				if ctx.isAcked() {
   795  					ctx.kicked = true
   796  					continue
   797  				}
   798  				s.kickSysmsgThread()
   799  				ctx.kicked = true
   800  				continue
   801  			}
   802  			if events&sharedContextSlowPath != 0 {
   803  				ctx.disableSentryFastPath()
   804  				slowPath = true
   805  				continue
   806  			}
   807  		} else {
   808  			// If the context already received a handshake then it knows it's being
   809  			// worked on.
   810  			if !ctx.kicked && !ctx.isAcked() {
   811  				ctx.kicked = s.kickSysmsgThread()
   812  			}
   813  
   814  			ctx.sleepOnState(curState)
   815  		}
   816  	}
   817  
   818  	ctx.resetAcked()
   819  	ctx.enableSentryFastPath()
   820  }
   821  
   822  // canKickSysmsgThread returns true if a new thread can be kicked.
   823  // The second return value is the expected number of threads after kicking a
   824  // new one.
   825  func (s *subprocess) canKickSysmsgThread() (bool, uint32) {
   826  	// numActiveContexts and numActiveThreads can be changed from stub
   827  	// threads that handles the contextQueue without any locks. The idea
   828  	// here is that any stub thread that gets CPU time can make some
   829  	// progress. In stub threads, we can use only spinlock-like
   830  	// synchronizations, but they don't work well because a thread that
   831  	// holds a lock can be preempted by another thread that is waiting for
   832  	// the same lock.
   833  	nrActiveThreads := atomic.LoadUint32(&s.contextQueue.numActiveThreads)
   834  	nrThreadsToWakeup := atomic.LoadUint32(&s.contextQueue.numThreadsToWakeup)
   835  	nrActiveContexts := atomic.LoadUint32(&s.contextQueue.numActiveContexts)
   836  
   837  	nrActiveThreads += nrThreadsToWakeup + 1
   838  	if nrActiveThreads > nrActiveContexts {
   839  		// This can happen when one or more stub threads are
   840  		// waiting for cpu time. The host probably has more
   841  		// running tasks than a number of cpu-s.
   842  		return false, nrActiveThreads
   843  	}
   844  	return true, nrActiveThreads
   845  }
   846  
   847  func (s *subprocess) kickSysmsgThread() bool {
   848  	kick, _ := s.canKickSysmsgThread()
   849  	if !kick {
   850  		return false
   851  	}
   852  
   853  	s.sysmsgThreadsMu.Lock()
   854  	kick, nrThreads := s.canKickSysmsgThread()
   855  	if !kick {
   856  		s.sysmsgThreadsMu.Unlock()
   857  		return false
   858  	}
   859  	atomic.AddUint32(&s.contextQueue.numThreadsToWakeup, 1)
   860  	if s.numSysmsgThreads < maxSysmsgThreads && s.numSysmsgThreads < int(nrThreads) {
   861  		s.numSysmsgThreads++
   862  		s.sysmsgThreadsMu.Unlock()
   863  		if err := s.createSysmsgThread(); err != nil {
   864  			log.Warningf("Unable to create a new stub thread: %s", err)
   865  			s.sysmsgThreadsMu.Lock()
   866  			s.numSysmsgThreads--
   867  			s.sysmsgThreadsMu.Unlock()
   868  		}
   869  	} else {
   870  		s.sysmsgThreadsMu.Unlock()
   871  	}
   872  	s.contextQueue.wakeupSysmsgThread()
   873  
   874  	return false
   875  }
   876  
   877  // syscall executes the given system call without handling interruptions.
   878  func (s *subprocess) syscall(sysno uintptr, args ...arch.SyscallArgument) (uintptr, error) {
   879  	s.syscallThreadMu.Lock()
   880  	defer s.syscallThreadMu.Unlock()
   881  
   882  	return s.syscallThread.syscall(sysno, args...)
   883  }
   884  
   885  // MapFile implements platform.AddressSpace.MapFile.
   886  func (s *subprocess) MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error {
   887  	var flags int
   888  	if precommit {
   889  		flags |= unix.MAP_POPULATE
   890  	}
   891  	_, err := s.syscall(
   892  		unix.SYS_MMAP,
   893  		arch.SyscallArgument{Value: uintptr(addr)},
   894  		arch.SyscallArgument{Value: uintptr(fr.Length())},
   895  		arch.SyscallArgument{Value: uintptr(at.Prot())},
   896  		arch.SyscallArgument{Value: uintptr(flags | unix.MAP_SHARED | unix.MAP_FIXED)},
   897  		arch.SyscallArgument{Value: uintptr(f.FD())},
   898  		arch.SyscallArgument{Value: uintptr(fr.Start)})
   899  	return err
   900  }
   901  
   902  // Unmap implements platform.AddressSpace.Unmap.
   903  func (s *subprocess) Unmap(addr hostarch.Addr, length uint64) {
   904  	ar, ok := addr.ToRange(length)
   905  	if !ok {
   906  		panic(fmt.Sprintf("addr %#x + length %#x overflows", addr, length))
   907  	}
   908  	s.mu.Lock()
   909  	for c := range s.faultedContexts {
   910  		c.mu.Lock()
   911  		if c.lastFaultSP == s && ar.Contains(c.lastFaultAddr) {
   912  			// Forget the last fault so that if c faults again, the fault isn't
   913  			// incorrectly reported as a write fault. If this is being called
   914  			// due to munmap() of the corresponding vma, handling of the second
   915  			// fault will fail anyway.
   916  			c.lastFaultSP = nil
   917  			delete(s.faultedContexts, c)
   918  		}
   919  		c.mu.Unlock()
   920  	}
   921  	s.mu.Unlock()
   922  	_, err := s.syscall(
   923  		unix.SYS_MUNMAP,
   924  		arch.SyscallArgument{Value: uintptr(addr)},
   925  		arch.SyscallArgument{Value: uintptr(length)})
   926  	if err != nil {
   927  		// We never expect this to happen.
   928  		panic(fmt.Sprintf("munmap(%x, %x)) failed: %v", addr, length, err))
   929  	}
   930  }
   931  
   932  func (s *subprocess) PullFullState(c *context, ac *arch.Context64) error {
   933  	if !c.sharedContext.isActiveInSubprocess(s) {
   934  		panic("Attempted to PullFullState for context that is not used in subprocess")
   935  	}
   936  	saveFPState(c.sharedContext, ac)
   937  	return nil
   938  }
   939  
   940  var sysmsgThreadPriority int
   941  
   942  func initSysmsgThreadPriority() {
   943  	prio, err := unix.Getpriority(unix.PRIO_PROCESS, 0)
   944  	if err != nil {
   945  		panic("unable to get current scheduling priority")
   946  	}
   947  	// Sysmsg threads are executed with a priority one lower than the Sentry.
   948  	sysmsgThreadPriority = 20 - prio + 1
   949  }
   950  
   951  // createSysmsgThread creates a new sysmsg thread.
   952  // The thread starts processing any available context in the context queue.
   953  func (s *subprocess) createSysmsgThread() error {
   954  	// Create a new seccomp process.
   955  	var r requestThread
   956  	r.thread = make(chan *thread)
   957  	s.requests <- r
   958  	p := <-r.thread
   959  
   960  	runtime.LockOSThread()
   961  	defer runtime.UnlockOSThread()
   962  	p.attach()
   963  
   964  	// Skip SIGSTOP.
   965  	if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_CONT, uintptr(p.tid), 0, 0, 0, 0); errno != 0 {
   966  		panic(fmt.Sprintf("ptrace cont failed: %v", errno))
   967  	}
   968  	sig := p.wait(stopped)
   969  	if sig != unix.SIGSTOP {
   970  		panic(fmt.Sprintf("error waiting for new clone: expected SIGSTOP, got %v", sig))
   971  	}
   972  
   973  	// Allocate a new stack for the BPF process.
   974  	opts := pgalloc.AllocOpts{
   975  		Kind: usage.System,
   976  		Dir:  pgalloc.TopDown,
   977  	}
   978  	fr, err := s.memoryFile.Allocate(uint64(sysmsg.PerThreadSharedStackSize), opts)
   979  	if err != nil {
   980  		// TODO(b/144063246): Need to fail the clone system call.
   981  		panic(fmt.Sprintf("failed to allocate a new stack: %v", err))
   982  	}
   983  	sysThread := &sysmsgThread{
   984  		thread:     p,
   985  		subproc:    s,
   986  		stackRange: fr,
   987  	}
   988  	// Use the sysmsgStackID as a handle on this thread instead of host tid in
   989  	// order to be able to reliably specify invalidThreadID.
   990  	threadID := uint32(p.sysmsgStackID)
   991  
   992  	// Map the stack into the sentry.
   993  	sentryStackAddr, _, errno := unix.RawSyscall6(
   994  		unix.SYS_MMAP,
   995  		0,
   996  		sysmsg.PerThreadSharedStackSize,
   997  		unix.PROT_WRITE|unix.PROT_READ,
   998  		unix.MAP_SHARED|unix.MAP_FILE,
   999  		uintptr(s.memoryFile.FD()), uintptr(fr.Start))
  1000  	if errno != 0 {
  1001  		panic(fmt.Sprintf("mmap failed: %v", errno))
  1002  	}
  1003  
  1004  	// Before installing the stub syscall filters, we need to call a few
  1005  	// system calls (e.g. sigaltstack, sigaction) which have in-memory
  1006  	// arguments.  We need to prevent changing these parameters by other
  1007  	// stub threads, so lets map the future BPF stack as read-only and
  1008  	// fill syscall arguments from the Sentry.
  1009  	sysmsgStackAddr := sysThread.sysmsgPerThreadMemAddr() + sysmsg.PerThreadSharedStackOffset
  1010  	err = sysThread.mapStack(sysmsgStackAddr, true)
  1011  	if err != nil {
  1012  		panic(fmt.Sprintf("mmap failed: %v", err))
  1013  	}
  1014  
  1015  	sysThread.init(sentryStackAddr, sysmsgStackAddr)
  1016  
  1017  	// Map the stack into the BPF process.
  1018  	err = sysThread.mapStack(sysmsgStackAddr, false)
  1019  	if err != nil {
  1020  		s.memoryFile.DecRef(fr)
  1021  		panic(fmt.Sprintf("mmap failed: %v", err))
  1022  	}
  1023  
  1024  	// Map the stack into the BPF process.
  1025  	privateStackAddr := sysThread.sysmsgPerThreadMemAddr() + sysmsg.PerThreadPrivateStackOffset
  1026  	err = sysThread.mapPrivateStack(privateStackAddr, sysmsg.PerThreadPrivateStackSize)
  1027  	if err != nil {
  1028  		s.memoryFile.DecRef(fr)
  1029  		panic(fmt.Sprintf("mmap failed: %v", err))
  1030  	}
  1031  
  1032  	sysThread.setMsg(sysmsg.StackAddrToMsg(sentryStackAddr))
  1033  	sysThread.msg.Init(threadID)
  1034  	sysThread.msg.Self = uint64(sysmsgStackAddr + sysmsg.MsgOffsetFromSharedStack)
  1035  	sysThread.msg.SyshandlerStack = uint64(sysmsg.StackAddrToSyshandlerStack(sysThread.sysmsgPerThreadMemAddr()))
  1036  	sysThread.msg.Syshandler = uint64(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_syshandler))
  1037  
  1038  	sysThread.msg.State.Set(sysmsg.ThreadStateInitializing)
  1039  
  1040  	if err := unix.Setpriority(unix.PRIO_PROCESS, int(p.tid), sysmsgThreadPriority); err != nil {
  1041  		log.Warningf("Unable to change priority of a stub thread: %s", err)
  1042  	}
  1043  
  1044  	// Install a pre-compiled seccomp rules for the BPF process.
  1045  	_, err = p.syscallIgnoreInterrupt(&p.initRegs, unix.SYS_PRCTL,
  1046  		arch.SyscallArgument{Value: uintptr(linux.PR_SET_NO_NEW_PRIVS)},
  1047  		arch.SyscallArgument{Value: uintptr(1)},
  1048  		arch.SyscallArgument{Value: uintptr(0)},
  1049  		arch.SyscallArgument{Value: uintptr(0)},
  1050  		arch.SyscallArgument{Value: uintptr(0)},
  1051  		arch.SyscallArgument{Value: uintptr(0)})
  1052  	if err != nil {
  1053  		panic(fmt.Sprintf("prctl(PR_SET_NO_NEW_PRIVS) failed: %v", err))
  1054  	}
  1055  
  1056  	_, err = p.syscallIgnoreInterrupt(&p.initRegs, seccomp.SYS_SECCOMP,
  1057  		arch.SyscallArgument{Value: uintptr(linux.SECCOMP_SET_MODE_FILTER)},
  1058  		arch.SyscallArgument{Value: uintptr(0)},
  1059  		arch.SyscallArgument{Value: stubSysmsgRules})
  1060  	if err != nil {
  1061  		panic(fmt.Sprintf("seccomp failed: %v", err))
  1062  	}
  1063  
  1064  	// Prepare to start the BPF process.
  1065  	tregs := &arch.Registers{}
  1066  	s.resetSysemuRegs(tregs)
  1067  	setArchSpecificRegs(sysThread, tregs)
  1068  	if err := p.setRegs(tregs); err != nil {
  1069  		panic(fmt.Sprintf("ptrace set regs failed: %v", err))
  1070  	}
  1071  	archSpecificSysmsgThreadInit(sysThread)
  1072  	// Skip SIGSTOP.
  1073  	if _, _, e := unix.RawSyscall(unix.SYS_TGKILL, uintptr(p.tgid), uintptr(p.tid), uintptr(unix.SIGCONT)); e != 0 {
  1074  		panic(fmt.Sprintf("tkill failed: %v", e))
  1075  	}
  1076  	// Resume the BPF process.
  1077  	if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_DETACH, uintptr(p.tid), 0, 0, 0, 0); errno != 0 {
  1078  		panic(fmt.Sprintf("can't detach new clone: %v", errno))
  1079  	}
  1080  
  1081  	s.sysmsgThreadsMu.Lock()
  1082  	s.sysmsgThreads[threadID] = sysThread
  1083  	s.sysmsgThreadsMu.Unlock()
  1084  
  1085  	return nil
  1086  }
  1087  
  1088  // PreFork implements platform.AddressSpace.PreFork.
  1089  // We need to take the usertrap lock to be sure that fork() will not be in the
  1090  // middle of applying a binary patch.
  1091  func (s *subprocess) PreFork() {
  1092  	s.usertrap.PreFork()
  1093  }
  1094  
  1095  // PostFork implements platform.AddressSpace.PostFork.
  1096  func (s *subprocess) PostFork() {
  1097  	s.usertrap.PostFork() // +checklocksforce: PreFork acquires, above.
  1098  }
  1099  
  1100  // activateContext activates the context in this subprocess.
  1101  // No-op if the context is already active within the subprocess; if not,
  1102  // deactivates it from its last subprocess.
  1103  func (s *subprocess) activateContext(c *context) error {
  1104  	if !c.sharedContext.isActiveInSubprocess(s) {
  1105  		c.sharedContext.release()
  1106  		c.sharedContext = nil
  1107  
  1108  		shared, err := s.getSharedContext()
  1109  		if err != nil {
  1110  			return err
  1111  		}
  1112  		c.sharedContext = shared
  1113  	}
  1114  	return nil
  1115  }