github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/ptrace.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"fmt"
    19  	"sync/atomic"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    22  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    23  	"github.com/SagerNet/gvisor/pkg/hostarch"
    24  	"github.com/SagerNet/gvisor/pkg/marshal/primitive"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/mm"
    26  	"github.com/SagerNet/gvisor/pkg/syserror"
    27  	"github.com/SagerNet/gvisor/pkg/usermem"
    28  )
    29  
    30  // ptraceOptions are the subset of options controlling a task's ptrace behavior
    31  // that are set by ptrace(PTRACE_SETOPTIONS).
    32  //
    33  // +stateify savable
    34  type ptraceOptions struct {
    35  	// ExitKill is true if the tracee should be sent SIGKILL when the tracer
    36  	// exits.
    37  	ExitKill bool
    38  
    39  	// If SysGood is true, set bit 7 in the signal number for
    40  	// syscall-entry-stop and syscall-exit-stop traps delivered to this task's
    41  	// tracer.
    42  	SysGood bool
    43  
    44  	// TraceClone is true if the tracer wants to receive PTRACE_EVENT_CLONE
    45  	// events.
    46  	TraceClone bool
    47  
    48  	// TraceExec is true if the tracer wants to receive PTRACE_EVENT_EXEC
    49  	// events.
    50  	TraceExec bool
    51  
    52  	// TraceExit is true if the tracer wants to receive PTRACE_EVENT_EXIT
    53  	// events.
    54  	TraceExit bool
    55  
    56  	// TraceFork is true if the tracer wants to receive PTRACE_EVENT_FORK
    57  	// events.
    58  	TraceFork bool
    59  
    60  	// TraceSeccomp is true if the tracer wants to receive PTRACE_EVENT_SECCOMP
    61  	// events.
    62  	TraceSeccomp bool
    63  
    64  	// TraceVfork is true if the tracer wants to receive PTRACE_EVENT_VFORK
    65  	// events.
    66  	TraceVfork bool
    67  
    68  	// TraceVforkDone is true if the tracer wants to receive
    69  	// PTRACE_EVENT_VFORK_DONE events.
    70  	TraceVforkDone bool
    71  }
    72  
    73  // ptraceSyscallMode controls the behavior of a ptraced task at syscall entry
    74  // and exit.
    75  type ptraceSyscallMode int
    76  
    77  const (
    78  	// ptraceSyscallNone indicates that the task has never ptrace-stopped, or
    79  	// that it was resumed from its last ptrace-stop by PTRACE_CONT or
    80  	// PTRACE_DETACH. The task's syscalls will not be intercepted.
    81  	ptraceSyscallNone ptraceSyscallMode = iota
    82  
    83  	// ptraceSyscallIntercept indicates that the task was resumed from its last
    84  	// ptrace-stop by PTRACE_SYSCALL. The next time the task enters or exits a
    85  	// syscall, a ptrace-stop will occur.
    86  	ptraceSyscallIntercept
    87  
    88  	// ptraceSyscallEmu indicates that the task was resumed from its last
    89  	// ptrace-stop by PTRACE_SYSEMU or PTRACE_SYSEMU_SINGLESTEP. The next time
    90  	// the task enters a syscall, the syscall will be skipped, and a
    91  	// ptrace-stop will occur.
    92  	ptraceSyscallEmu
    93  )
    94  
    95  // CanTrace checks that t is permitted to access target's state, as defined by
    96  // ptrace(2), subsection "Ptrace access mode checking". If attach is true, it
    97  // checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access
    98  // mode PTRACE_MODE_READ.
    99  //
   100  // In Linux, ptrace access restrictions may be configured by LSMs. While we do
   101  // not support LSMs, we do add additional restrictions based on the commoncap
   102  // and YAMA LSMs.
   103  //
   104  // TODO(github.com/SagerNet/issue/212): The result of CanTrace is immediately stale (e.g., a
   105  // racing setuid(2) may change traceability). This may pose a risk when a task
   106  // changes from traceable to not traceable. This is only problematic across
   107  // execve, where privileges may increase.
   108  //
   109  // We currently do not implement privileged executables (set-user/group-ID bits
   110  // and file capabilities), so that case is not reachable.
   111  func (t *Task) CanTrace(target *Task, attach bool) bool {
   112  	// "If the calling thread and the target thread are in the same thread
   113  	// group, access is always allowed." - ptrace(2)
   114  	//
   115  	// Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access()
   116  	// should not deny sub-threads", first released in Linux 3.12), the rule
   117  	// only applies if t and target are the same task. But, as that commit
   118  	// message puts it, "[any] security check is pointless when the tasks share
   119  	// the same ->mm."
   120  	if t.tg == target.tg {
   121  		return true
   122  	}
   123  
   124  	if !t.canTraceStandard(target, attach) {
   125  		return false
   126  	}
   127  
   128  	// YAMA only supported for vfs2.
   129  	if !VFS2Enabled {
   130  		return true
   131  	}
   132  
   133  	if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL {
   134  		t.tg.pidns.owner.mu.RLock()
   135  		defer t.tg.pidns.owner.mu.RUnlock()
   136  		if !t.canTraceYAMALocked(target) {
   137  			return false
   138  		}
   139  	}
   140  	return true
   141  }
   142  
   143  // canTraceLocked is the same as CanTrace, except the caller must already hold
   144  // the TaskSet mutex (for reading or writing).
   145  func (t *Task) canTraceLocked(target *Task, attach bool) bool {
   146  	if t.tg == target.tg {
   147  		return true
   148  	}
   149  
   150  	if !t.canTraceStandard(target, attach) {
   151  		return false
   152  	}
   153  
   154  	// YAMA only supported for vfs2.
   155  	if !VFS2Enabled {
   156  		return true
   157  	}
   158  
   159  	if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL {
   160  		if !t.canTraceYAMALocked(target) {
   161  			return false
   162  		}
   163  	}
   164  	return true
   165  }
   166  
   167  // canTraceStandard performs standard ptrace access checks as defined by
   168  // kernel/ptrace.c:__ptrace_may_access as well as the commoncap LSM
   169  // implementation of the security_ptrace_access_check() interface, which is
   170  // always invoked.
   171  func (t *Task) canTraceStandard(target *Task, attach bool) bool {
   172  	// """
   173  	// TODO(github.com/SagerNet/issue/260): 1. If the access mode specifies
   174  	// PTRACE_MODE_FSCREDS (ED: snipped, doesn't exist until Linux 4.5).
   175  	//
   176  	// Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the
   177  	// caller's real UID and GID for the checks in the next step. (Most APIs
   178  	// that check the caller's UID and GID use the effective IDs. For
   179  	// historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs
   180  	// instead.)
   181  	//
   182  	// 2. Deny access if neither of the following is true:
   183  	//
   184  	// - The real, effective, and saved-set user IDs of the target match the
   185  	// caller's user ID, *and* the real, effective, and saved-set group IDs of
   186  	// the target match the caller's group ID.
   187  	//
   188  	// - The caller has the CAP_SYS_PTRACE capability in the user namespace of
   189  	// the target.
   190  	//
   191  	// 3. Deny access if the target process "dumpable" attribute has a value
   192  	// other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in
   193  	// prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in
   194  	// the user namespace of the target process.
   195  	//
   196  	// 4. The commoncap LSM performs the following steps:
   197  	//
   198  	// a) If the access mode includes PTRACE_MODE_FSCREDS, then use the
   199  	// caller's effective capability set; otherwise (the access mode specifies
   200  	// PTRACE_MODE_REALCREDS, so) use the caller's permitted capability set.
   201  	//
   202  	// b) Deny access if neither of the following is true:
   203  	//
   204  	// - The caller and the target process are in the same user namespace, and
   205  	// the caller's capabilities are a proper superset of the target process's
   206  	// permitted capabilities.
   207  	//
   208  	// - The caller has the CAP_SYS_PTRACE capability in the target process's
   209  	// user namespace.
   210  	//
   211  	// Note that the commoncap LSM does not distinguish between
   212  	// PTRACE_MODE_READ and PTRACE_MODE_ATTACH. (ED: From earlier in this
   213  	// section: "the commoncap LSM ... is always invoked".)
   214  	// """
   215  	callerCreds := t.Credentials()
   216  	targetCreds := target.Credentials()
   217  	if callerCreds.HasCapabilityIn(linux.CAP_SYS_PTRACE, targetCreds.UserNamespace) {
   218  		return true
   219  	}
   220  	if cuid := callerCreds.RealKUID; cuid != targetCreds.RealKUID || cuid != targetCreds.EffectiveKUID || cuid != targetCreds.SavedKUID {
   221  		return false
   222  	}
   223  	if cgid := callerCreds.RealKGID; cgid != targetCreds.RealKGID || cgid != targetCreds.EffectiveKGID || cgid != targetCreds.SavedKGID {
   224  		return false
   225  	}
   226  	var targetMM *mm.MemoryManager
   227  	target.WithMuLocked(func(t *Task) {
   228  		targetMM = t.MemoryManager()
   229  	})
   230  	if targetMM != nil && targetMM.Dumpability() != mm.UserDumpable {
   231  		return false
   232  	}
   233  	if callerCreds.UserNamespace != targetCreds.UserNamespace {
   234  		return false
   235  	}
   236  	if targetCreds.PermittedCaps&^callerCreds.PermittedCaps != 0 {
   237  		return false
   238  	}
   239  	return true
   240  }
   241  
   242  // canTraceYAMALocked performs ptrace access checks as defined by the YAMA LSM
   243  // implementation of the security_ptrace_access_check() interface, with YAMA
   244  // configured to mode 1. This is a common default among various Linux
   245  // distributions.
   246  //
   247  // It only permits the tracer to proceed if one of the following conditions is
   248  // met:
   249  //
   250  // a) The tracer is already attached to the tracee.
   251  //
   252  // b) The target is a descendant of the tracer.
   253  //
   254  // c) The target has explicitly given permission to the tracer through the
   255  // PR_SET_PTRACER prctl.
   256  //
   257  // d) The tracer has CAP_SYS_PTRACE.
   258  //
   259  // See security/yama/yama_lsm.c:yama_ptrace_access_check.
   260  //
   261  // Precondition: the TaskSet mutex must be locked (for reading or writing).
   262  func (t *Task) canTraceYAMALocked(target *Task) bool {
   263  	if tracer := target.Tracer(); tracer != nil {
   264  		if tracer.tg == t.tg {
   265  			return true
   266  		}
   267  	}
   268  	if target.isYAMADescendantOfLocked(t) {
   269  		return true
   270  	}
   271  	if target.hasYAMAExceptionForLocked(t) {
   272  		return true
   273  	}
   274  	if t.HasCapabilityIn(linux.CAP_SYS_PTRACE, target.UserNamespace()) {
   275  		return true
   276  	}
   277  	return false
   278  }
   279  
   280  // Determines whether t is considered a descendant of ancestor for the purposes
   281  // of YAMA permissions (specifically, whether t's thread group is descended from
   282  // ancestor's).
   283  //
   284  // Precondition: the TaskSet mutex must be locked (for reading or writing).
   285  func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool {
   286  	walker := t
   287  	for walker != nil {
   288  		if walker.tg.leader == ancestor.tg.leader {
   289  			return true
   290  		}
   291  		walker = walker.parent
   292  	}
   293  	return false
   294  }
   295  
   296  // Precondition: the TaskSet mutex must be locked (for reading or writing).
   297  func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool {
   298  	allowed, ok := t.k.ptraceExceptions[t.tg.leader]
   299  	if !ok {
   300  		return false
   301  	}
   302  	return allowed == nil || tracer.isYAMADescendantOfLocked(allowed)
   303  }
   304  
   305  // ClearYAMAException removes any YAMA exception with t as the tracee.
   306  func (t *Task) ClearYAMAException() {
   307  	t.tg.pidns.owner.mu.Lock()
   308  	defer t.tg.pidns.owner.mu.Unlock()
   309  	tracee := t.tg.leader
   310  	delete(t.k.ptraceExceptions, tracee)
   311  }
   312  
   313  // SetYAMAException creates a YAMA exception allowing all descendants of tracer
   314  // to trace t. If tracer is nil, then any task is allowed to trace t.
   315  //
   316  // If there was an existing exception, it is overwritten with the new one.
   317  func (t *Task) SetYAMAException(tracer *Task) {
   318  	t.tg.pidns.owner.mu.Lock()
   319  	defer t.tg.pidns.owner.mu.Unlock()
   320  
   321  	tracee := t.tg.leader
   322  	tracee.ptraceYAMAExceptionAdded = true
   323  	if tracer != nil {
   324  		tracer.ptraceYAMAExceptionAdded = true
   325  	}
   326  
   327  	t.k.ptraceExceptions[tracee] = tracer
   328  }
   329  
   330  // Tracer returns t's ptrace Tracer.
   331  func (t *Task) Tracer() *Task {
   332  	return t.ptraceTracer.Load().(*Task)
   333  }
   334  
   335  // hasTracer returns true if t has a ptrace tracer attached.
   336  func (t *Task) hasTracer() bool {
   337  	// This isn't just inlined into callers so that if Task.Tracer() turns out
   338  	// to be too expensive because of e.g. interface conversion, we can switch
   339  	// to having a separate atomic flag more easily.
   340  	return t.Tracer() != nil
   341  }
   342  
   343  // ptraceStop is a TaskStop placed on tasks in a ptrace-stop.
   344  //
   345  // +stateify savable
   346  type ptraceStop struct {
   347  	// If frozen is true, the stopped task's tracer is currently operating on
   348  	// it, so Task.Kill should not remove the stop.
   349  	frozen bool
   350  
   351  	// If listen is true, the stopped task's tracer invoked PTRACE_LISTEN, so
   352  	// ptraceFreeze should fail.
   353  	listen bool
   354  }
   355  
   356  // Killable implements TaskStop.Killable.
   357  func (s *ptraceStop) Killable() bool {
   358  	return !s.frozen
   359  }
   360  
   361  // beginPtraceStopLocked initiates an unfrozen ptrace-stop on t. If t has been
   362  // killed, the stop is skipped, and beginPtraceStopLocked returns false.
   363  //
   364  // beginPtraceStopLocked does not signal t's tracer or wake it if it is
   365  // waiting.
   366  //
   367  // Preconditions:
   368  // * The TaskSet mutex must be locked.
   369  // * The caller must be running on the task goroutine.
   370  func (t *Task) beginPtraceStopLocked() bool {
   371  	t.tg.signalHandlers.mu.Lock()
   372  	defer t.tg.signalHandlers.mu.Unlock()
   373  	// This is analogous to Linux's kernel/signal.c:ptrace_stop() => ... =>
   374  	// kernel/sched/core.c:__schedule() => signal_pending_state() check, which
   375  	// is what prevents tasks from entering ptrace-stops after being killed.
   376  	// Note that if t was SIGKILLed and beingPtraceStopLocked is being called
   377  	// for PTRACE_EVENT_EXIT, the task will have dequeued the signal before
   378  	// entering the exit path, so t.killedLocked() will no longer return true.
   379  	// This is consistent with Linux: "Bugs: ... A SIGKILL signal may still
   380  	// cause a PTRACE_EVENT_EXIT stop before actual signal death. This may be
   381  	// changed in the future; SIGKILL is meant to always immediately kill tasks
   382  	// even under ptrace. Last confirmed on Linux 3.13." - ptrace(2)
   383  	if t.killedLocked() {
   384  		return false
   385  	}
   386  	t.beginInternalStopLocked(&ptraceStop{})
   387  	return true
   388  }
   389  
   390  // Preconditions: The TaskSet mutex must be locked.
   391  func (t *Task) ptraceTrapLocked(code int32) {
   392  	// This is unconditional in ptrace_stop().
   393  	t.tg.signalHandlers.mu.Lock()
   394  	t.trapStopPending = false
   395  	t.tg.signalHandlers.mu.Unlock()
   396  	t.ptraceCode = code
   397  	t.ptraceSiginfo = &linux.SignalInfo{
   398  		Signo: int32(linux.SIGTRAP),
   399  		Code:  code,
   400  	}
   401  	t.ptraceSiginfo.SetPID(int32(t.tg.pidns.tids[t]))
   402  	t.ptraceSiginfo.SetUID(int32(t.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()))
   403  	if t.beginPtraceStopLocked() {
   404  		tracer := t.Tracer()
   405  		tracer.signalStop(t, linux.CLD_TRAPPED, int32(linux.SIGTRAP))
   406  		tracer.tg.eventQueue.Notify(EventTraceeStop)
   407  	}
   408  }
   409  
   410  // ptraceFreeze checks if t is in a ptraceStop. If so, it freezes the
   411  // ptraceStop, temporarily preventing it from being removed by a concurrent
   412  // Task.Kill, and returns true. Otherwise it returns false.
   413  //
   414  // Preconditions:
   415  // * The TaskSet mutex must be locked.
   416  // * The caller must be running on the task goroutine of t's tracer.
   417  func (t *Task) ptraceFreeze() bool {
   418  	t.tg.signalHandlers.mu.Lock()
   419  	defer t.tg.signalHandlers.mu.Unlock()
   420  	if t.stop == nil {
   421  		return false
   422  	}
   423  	s, ok := t.stop.(*ptraceStop)
   424  	if !ok {
   425  		return false
   426  	}
   427  	if s.listen {
   428  		return false
   429  	}
   430  	s.frozen = true
   431  	return true
   432  }
   433  
   434  // ptraceUnfreeze ends the effect of a previous successful call to
   435  // ptraceFreeze.
   436  //
   437  // Preconditions: t must be in a frozen ptraceStop.
   438  func (t *Task) ptraceUnfreeze() {
   439  	// t.tg.signalHandlers is stable because t is in a frozen ptrace-stop,
   440  	// preventing its thread group from completing execve.
   441  	t.tg.signalHandlers.mu.Lock()
   442  	defer t.tg.signalHandlers.mu.Unlock()
   443  	t.ptraceUnfreezeLocked()
   444  }
   445  
   446  // Preconditions:
   447  // * t must be in a frozen ptraceStop.
   448  // * t's signal mutex must be locked.
   449  func (t *Task) ptraceUnfreezeLocked() {
   450  	// Do this even if the task has been killed to ensure a panic if t.stop is
   451  	// nil or not a ptraceStop.
   452  	t.stop.(*ptraceStop).frozen = false
   453  	if t.killedLocked() {
   454  		t.endInternalStopLocked()
   455  	}
   456  }
   457  
   458  // ptraceUnstop implements ptrace request PTRACE_CONT, PTRACE_SYSCALL,
   459  // PTRACE_SINGLESTEP, PTRACE_SYSEMU, or PTRACE_SYSEMU_SINGLESTEP depending on
   460  // mode and singlestep.
   461  //
   462  // Preconditions: t must be in a frozen ptrace stop.
   463  //
   464  // Postconditions: If ptraceUnstop returns nil, t will no longer be in a ptrace
   465  // stop.
   466  func (t *Task) ptraceUnstop(mode ptraceSyscallMode, singlestep bool, sig linux.Signal) error {
   467  	if sig != 0 && !sig.IsValid() {
   468  		return syserror.EIO
   469  	}
   470  	t.tg.pidns.owner.mu.Lock()
   471  	defer t.tg.pidns.owner.mu.Unlock()
   472  	t.ptraceCode = int32(sig)
   473  	t.ptraceSyscallMode = mode
   474  	t.ptraceSinglestep = singlestep
   475  	t.tg.signalHandlers.mu.Lock()
   476  	defer t.tg.signalHandlers.mu.Unlock()
   477  	t.endInternalStopLocked()
   478  	return nil
   479  }
   480  
   481  func (t *Task) ptraceTraceme() error {
   482  	t.tg.pidns.owner.mu.Lock()
   483  	defer t.tg.pidns.owner.mu.Unlock()
   484  	if t.hasTracer() {
   485  		return linuxerr.EPERM
   486  	}
   487  	if t.parent == nil {
   488  		// In Linux, only init can not have a parent, and init is assumed never
   489  		// to invoke PTRACE_TRACEME. In the sentry, TGID 1 is an arbitrary user
   490  		// application that may invoke PTRACE_TRACEME; having no parent can
   491  		// also occur if all tasks in the parent thread group have exited, and
   492  		// failed to find a living thread group to reparent to. The former case
   493  		// is treated as if TGID 1 has an exited parent in an invisible
   494  		// ancestor PID namespace that is an owner of the root user namespace
   495  		// (and consequently has CAP_SYS_PTRACE), and the latter case is a
   496  		// special form of the exited parent case below. In either case,
   497  		// returning nil here is correct.
   498  		return nil
   499  	}
   500  	if !t.parent.canTraceLocked(t, true) {
   501  		return linuxerr.EPERM
   502  	}
   503  	if t.parent.exitState != TaskExitNone {
   504  		// Fail silently, as if we were successfully attached but then
   505  		// immediately detached. This is consistent with Linux.
   506  		return nil
   507  	}
   508  	t.ptraceTracer.Store(t.parent)
   509  	t.parent.ptraceTracees[t] = struct{}{}
   510  	return nil
   511  }
   512  
   513  // ptraceAttach implements ptrace(PTRACE_ATTACH, target) if seize is false, and
   514  // ptrace(PTRACE_SEIZE, target, 0, opts) if seize is true. t is the caller.
   515  func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
   516  	if t.tg == target.tg {
   517  		return linuxerr.EPERM
   518  	}
   519  	t.tg.pidns.owner.mu.Lock()
   520  	defer t.tg.pidns.owner.mu.Unlock()
   521  	if !t.canTraceLocked(target, true) {
   522  		return linuxerr.EPERM
   523  	}
   524  	if target.hasTracer() {
   525  		return linuxerr.EPERM
   526  	}
   527  	// Attaching to zombies and dead tasks is not permitted; the exit
   528  	// notification logic relies on this. Linux allows attaching to PF_EXITING
   529  	// tasks, though.
   530  	if target.exitState >= TaskExitZombie {
   531  		return linuxerr.EPERM
   532  	}
   533  	if seize {
   534  		if err := target.ptraceSetOptionsLocked(opts); err != nil {
   535  			return syserror.EIO
   536  		}
   537  	}
   538  	target.ptraceTracer.Store(t)
   539  	t.ptraceTracees[target] = struct{}{}
   540  	target.ptraceSeized = seize
   541  	target.tg.signalHandlers.mu.Lock()
   542  	// "Unlike PTRACE_ATTACH, PTRACE_SEIZE does not stop the process." -
   543  	// ptrace(2)
   544  	if !seize {
   545  		target.sendSignalLocked(&linux.SignalInfo{
   546  			Signo: int32(linux.SIGSTOP),
   547  			Code:  linux.SI_USER,
   548  		}, false /* group */)
   549  	}
   550  	// Undocumented Linux feature: If the tracee is already group-stopped (and
   551  	// consequently will not report the SIGSTOP just sent), force it to leave
   552  	// and re-enter the stop so that it will switch to a ptrace-stop.
   553  	if target.stop == (*groupStop)(nil) {
   554  		target.trapStopPending = true
   555  		target.endInternalStopLocked()
   556  		// TODO(jamieliu): Linux blocks ptrace_attach() until the task has
   557  		// entered the ptrace-stop (or exited) via JOBCTL_TRAPPING.
   558  	}
   559  	target.tg.signalHandlers.mu.Unlock()
   560  	return nil
   561  }
   562  
   563  // ptraceDetach implements ptrace(PTRACE_DETACH, target, 0, sig). t is the
   564  // caller.
   565  //
   566  // Preconditions: target must be a tracee of t in a frozen ptrace stop.
   567  //
   568  // Postconditions: If ptraceDetach returns nil, target will no longer be in a
   569  // ptrace stop.
   570  func (t *Task) ptraceDetach(target *Task, sig linux.Signal) error {
   571  	if sig != 0 && !sig.IsValid() {
   572  		return syserror.EIO
   573  	}
   574  	t.tg.pidns.owner.mu.Lock()
   575  	defer t.tg.pidns.owner.mu.Unlock()
   576  	target.ptraceCode = int32(sig)
   577  	target.forgetTracerLocked()
   578  	delete(t.ptraceTracees, target)
   579  	return nil
   580  }
   581  
   582  // exitPtrace is called in the exit path to detach all of t's tracees.
   583  func (t *Task) exitPtrace() {
   584  	t.tg.pidns.owner.mu.Lock()
   585  	defer t.tg.pidns.owner.mu.Unlock()
   586  	for target := range t.ptraceTracees {
   587  		if target.ptraceOpts.ExitKill {
   588  			target.tg.signalHandlers.mu.Lock()
   589  			target.sendSignalLocked(&linux.SignalInfo{
   590  				Signo: int32(linux.SIGKILL),
   591  			}, false /* group */)
   592  			target.tg.signalHandlers.mu.Unlock()
   593  		}
   594  		// Leave ptraceCode unchanged so that if the task is ptrace-stopped, it
   595  		// observes the ptraceCode it set before it entered the stop. I believe
   596  		// this is consistent with Linux.
   597  		target.forgetTracerLocked()
   598  	}
   599  	// "nil maps cannot be saved"
   600  	t.ptraceTracees = make(map[*Task]struct{})
   601  
   602  	if t.ptraceYAMAExceptionAdded {
   603  		delete(t.k.ptraceExceptions, t)
   604  		for tracee, tracer := range t.k.ptraceExceptions {
   605  			if tracer == t {
   606  				delete(t.k.ptraceExceptions, tracee)
   607  			}
   608  		}
   609  	}
   610  }
   611  
   612  // forgetTracerLocked detaches t's tracer and ensures that t is no longer
   613  // ptrace-stopped.
   614  //
   615  // Preconditions: The TaskSet mutex must be locked for writing.
   616  func (t *Task) forgetTracerLocked() {
   617  	t.ptraceSeized = false
   618  	t.ptraceOpts = ptraceOptions{}
   619  	t.ptraceSyscallMode = ptraceSyscallNone
   620  	t.ptraceSinglestep = false
   621  	t.ptraceTracer.Store((*Task)(nil))
   622  	if t.exitTracerNotified && !t.exitTracerAcked {
   623  		t.exitTracerAcked = true
   624  		t.exitNotifyLocked(true)
   625  	}
   626  	t.tg.signalHandlers.mu.Lock()
   627  	defer t.tg.signalHandlers.mu.Unlock()
   628  	// Unset t.trapStopPending, which might have been set by PTRACE_INTERRUPT. If
   629  	// it wasn't, it will be reset via t.groupStopPending after the following.
   630  	t.trapStopPending = false
   631  	// If t's thread group is in a group stop and t is eligible to participate,
   632  	// make it do so. This is essentially the reverse of the special case in
   633  	// ptraceAttach, which converts a group stop to a ptrace stop. ("Handling
   634  	// of restart from group-stop is currently buggy, but the "as planned"
   635  	// behavior is to leave tracee stopped and waiting for SIGCONT." -
   636  	// ptrace(2))
   637  	if (t.tg.groupStopComplete || t.tg.groupStopPendingCount != 0) && !t.groupStopPending && t.exitState < TaskExitInitiated {
   638  		t.groupStopPending = true
   639  		// t already participated in the group stop when it unset
   640  		// groupStopPending.
   641  		t.groupStopAcknowledged = true
   642  		t.interrupt()
   643  	}
   644  	if _, ok := t.stop.(*ptraceStop); ok {
   645  		t.endInternalStopLocked()
   646  	}
   647  }
   648  
   649  // ptraceSignalLocked is called after signal dequeueing to check if t should
   650  // enter ptrace signal-delivery-stop.
   651  //
   652  // Preconditions:
   653  // * The signal mutex must be locked.
   654  // * The caller must be running on the task goroutine.
   655  // +checklocks:t.tg.signalHandlers.mu
   656  func (t *Task) ptraceSignalLocked(info *linux.SignalInfo) bool {
   657  	if linux.Signal(info.Signo) == linux.SIGKILL {
   658  		return false
   659  	}
   660  	if !t.hasTracer() {
   661  		return false
   662  	}
   663  	// The tracer might change this signal into a stop signal, in which case
   664  	// any SIGCONT received after the signal was originally dequeued should
   665  	// cancel it. This is consistent with Linux.
   666  	t.tg.groupStopDequeued = true
   667  	// This is unconditional in ptrace_stop().
   668  	t.trapStopPending = false
   669  	// Can't lock the TaskSet mutex while holding a signal mutex.
   670  	t.tg.signalHandlers.mu.Unlock()
   671  	defer t.tg.signalHandlers.mu.Lock()
   672  	t.tg.pidns.owner.mu.RLock()
   673  	defer t.tg.pidns.owner.mu.RUnlock()
   674  	tracer := t.Tracer()
   675  	if tracer == nil {
   676  		return false
   677  	}
   678  	t.ptraceCode = info.Signo
   679  	t.ptraceSiginfo = info
   680  	t.Debugf("Entering signal-delivery-stop for signal %d", info.Signo)
   681  	if t.beginPtraceStopLocked() {
   682  		tracer.signalStop(t, linux.CLD_TRAPPED, info.Signo)
   683  		tracer.tg.eventQueue.Notify(EventTraceeStop)
   684  	}
   685  	return true
   686  }
   687  
   688  // ptraceSeccomp is called when a seccomp-bpf filter returns action
   689  // SECCOMP_RET_TRACE to check if t should enter PTRACE_EVENT_SECCOMP stop. data
   690  // is the lower 16 bits of the filter's return value.
   691  func (t *Task) ptraceSeccomp(data uint16) bool {
   692  	if !t.hasTracer() {
   693  		return false
   694  	}
   695  	t.tg.pidns.owner.mu.RLock()
   696  	defer t.tg.pidns.owner.mu.RUnlock()
   697  	if !t.ptraceOpts.TraceSeccomp {
   698  		return false
   699  	}
   700  	t.Debugf("Entering PTRACE_EVENT_SECCOMP stop")
   701  	t.ptraceEventLocked(linux.PTRACE_EVENT_SECCOMP, uint64(data))
   702  	return true
   703  }
   704  
   705  // ptraceSyscallEnter is called immediately before entering a syscall to check
   706  // if t should enter ptrace syscall-enter-stop.
   707  func (t *Task) ptraceSyscallEnter() (taskRunState, bool) {
   708  	if !t.hasTracer() {
   709  		return nil, false
   710  	}
   711  	t.tg.pidns.owner.mu.RLock()
   712  	defer t.tg.pidns.owner.mu.RUnlock()
   713  	switch t.ptraceSyscallMode {
   714  	case ptraceSyscallNone:
   715  		return nil, false
   716  	case ptraceSyscallIntercept:
   717  		t.Debugf("Entering syscall-enter-stop from PTRACE_SYSCALL")
   718  		t.ptraceSyscallStopLocked()
   719  		return (*runSyscallAfterSyscallEnterStop)(nil), true
   720  	case ptraceSyscallEmu:
   721  		t.Debugf("Entering syscall-enter-stop from PTRACE_SYSEMU")
   722  		t.ptraceSyscallStopLocked()
   723  		return (*runSyscallAfterSysemuStop)(nil), true
   724  	}
   725  	panic(fmt.Sprintf("Unknown ptraceSyscallMode: %v", t.ptraceSyscallMode))
   726  }
   727  
   728  // ptraceSyscallExit is called immediately after leaving a syscall to check if
   729  // t should enter ptrace syscall-exit-stop.
   730  func (t *Task) ptraceSyscallExit() {
   731  	if !t.hasTracer() {
   732  		return
   733  	}
   734  	t.tg.pidns.owner.mu.RLock()
   735  	defer t.tg.pidns.owner.mu.RUnlock()
   736  	if t.ptraceSyscallMode != ptraceSyscallIntercept {
   737  		return
   738  	}
   739  	t.Debugf("Entering syscall-exit-stop")
   740  	t.ptraceSyscallStopLocked()
   741  }
   742  
   743  // Preconditions: The TaskSet mutex must be locked.
   744  func (t *Task) ptraceSyscallStopLocked() {
   745  	code := int32(linux.SIGTRAP)
   746  	if t.ptraceOpts.SysGood {
   747  		code |= 0x80
   748  	}
   749  	t.ptraceTrapLocked(code)
   750  }
   751  
   752  type ptraceCloneKind int32
   753  
   754  const (
   755  	// ptraceCloneKindClone represents a call to Task.Clone where
   756  	// TerminationSignal is not SIGCHLD and Vfork is false.
   757  	ptraceCloneKindClone ptraceCloneKind = iota
   758  
   759  	// ptraceCloneKindFork represents a call to Task.Clone where
   760  	// TerminationSignal is SIGCHLD and Vfork is false.
   761  	ptraceCloneKindFork
   762  
   763  	// ptraceCloneKindVfork represents a call to Task.Clone where Vfork is
   764  	// true.
   765  	ptraceCloneKindVfork
   766  )
   767  
   768  // ptraceClone is called at the end of a clone or fork syscall to check if t
   769  // should enter PTRACE_EVENT_CLONE, PTRACE_EVENT_FORK, or PTRACE_EVENT_VFORK
   770  // stop. child is the new task.
   771  func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions) bool {
   772  	if !t.hasTracer() {
   773  		return false
   774  	}
   775  	t.tg.pidns.owner.mu.Lock()
   776  	defer t.tg.pidns.owner.mu.Unlock()
   777  	event := false
   778  	if !opts.Untraced {
   779  		switch kind {
   780  		case ptraceCloneKindClone:
   781  			if t.ptraceOpts.TraceClone {
   782  				t.Debugf("Entering PTRACE_EVENT_CLONE stop")
   783  				t.ptraceEventLocked(linux.PTRACE_EVENT_CLONE, uint64(t.tg.pidns.tids[child]))
   784  				event = true
   785  			}
   786  		case ptraceCloneKindFork:
   787  			if t.ptraceOpts.TraceFork {
   788  				t.Debugf("Entering PTRACE_EVENT_FORK stop")
   789  				t.ptraceEventLocked(linux.PTRACE_EVENT_FORK, uint64(t.tg.pidns.tids[child]))
   790  				event = true
   791  			}
   792  		case ptraceCloneKindVfork:
   793  			if t.ptraceOpts.TraceVfork {
   794  				t.Debugf("Entering PTRACE_EVENT_VFORK stop")
   795  				t.ptraceEventLocked(linux.PTRACE_EVENT_VFORK, uint64(t.tg.pidns.tids[child]))
   796  				event = true
   797  			}
   798  		default:
   799  			panic(fmt.Sprintf("Unknown ptraceCloneKind: %v", kind))
   800  		}
   801  	}
   802  	// "If the PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, or PTRACE_O_TRACECLONE
   803  	// options are in effect, then children created by, respectively, vfork(2)
   804  	// or clone(2) with the CLONE_VFORK flag, fork(2) or clone(2) with the exit
   805  	// signal set to SIGCHLD, and other kinds of clone(2), are automatically
   806  	// attached to the same tracer which traced their parent. SIGSTOP is
   807  	// delivered to the children, causing them to enter signal-delivery-stop
   808  	// after they exit the system call which created them." - ptrace(2)
   809  	//
   810  	// clone(2)'s documentation of CLONE_UNTRACED and CLONE_PTRACE is
   811  	// confusingly wrong; see kernel/fork.c:_do_fork() => copy_process() =>
   812  	// include/linux/ptrace.h:ptrace_init_task().
   813  	if event || opts.InheritTracer {
   814  		tracer := t.Tracer()
   815  		if tracer != nil {
   816  			child.ptraceTracer.Store(tracer)
   817  			tracer.ptraceTracees[child] = struct{}{}
   818  			// "The "seized" behavior ... is inherited by children that are
   819  			// automatically attached using PTRACE_O_TRACEFORK,
   820  			// PTRACE_O_TRACEVFORK, and PTRACE_O_TRACECLONE." - ptrace(2)
   821  			child.ptraceSeized = t.ptraceSeized
   822  			// "Flags are inherited by new tracees created and "auto-attached"
   823  			// via active PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, or
   824  			// PTRACE_O_TRACECLONE options." - ptrace(2)
   825  			child.ptraceOpts = t.ptraceOpts
   826  			child.tg.signalHandlers.mu.Lock()
   827  			// "PTRACE_SEIZE: ... Automatically attached children stop with
   828  			// PTRACE_EVENT_STOP and WSTOPSIG(status) returns SIGTRAP instead
   829  			// of having SIGSTOP signal delivered to them." - ptrace(2)
   830  			if child.ptraceSeized {
   831  				child.trapStopPending = true
   832  			} else {
   833  				child.pendingSignals.enqueue(&linux.SignalInfo{
   834  					Signo: int32(linux.SIGSTOP),
   835  				}, nil)
   836  			}
   837  			// The child will self-interrupt() when its task goroutine starts
   838  			// running, so we don't have to.
   839  			child.tg.signalHandlers.mu.Unlock()
   840  		}
   841  	}
   842  	return event
   843  }
   844  
   845  // ptraceVforkDone is called after the end of a vfork stop to check if t should
   846  // enter PTRACE_EVENT_VFORK_DONE stop. child is the new task's thread ID in t's
   847  // PID namespace.
   848  func (t *Task) ptraceVforkDone(child ThreadID) bool {
   849  	if !t.hasTracer() {
   850  		return false
   851  	}
   852  	t.tg.pidns.owner.mu.RLock()
   853  	defer t.tg.pidns.owner.mu.RUnlock()
   854  	if !t.ptraceOpts.TraceVforkDone {
   855  		return false
   856  	}
   857  	t.Debugf("Entering PTRACE_EVENT_VFORK_DONE stop")
   858  	t.ptraceEventLocked(linux.PTRACE_EVENT_VFORK_DONE, uint64(child))
   859  	return true
   860  }
   861  
   862  // ptraceExec is called at the end of an execve syscall to check if t should
   863  // enter PTRACE_EVENT_EXEC stop. oldTID is t's thread ID, in its *tracer's* PID
   864  // namespace, prior to the execve. (If t did not have a tracer at the time
   865  // oldTID was read, oldTID may be 0. This is consistent with Linux.)
   866  func (t *Task) ptraceExec(oldTID ThreadID) {
   867  	if !t.hasTracer() {
   868  		return
   869  	}
   870  	t.tg.pidns.owner.mu.RLock()
   871  	defer t.tg.pidns.owner.mu.RUnlock()
   872  	// Recheck with the TaskSet mutex locked. Most ptrace points don't need to
   873  	// do this because detaching resets ptrace options, but PTRACE_EVENT_EXEC
   874  	// is special because both TraceExec and !TraceExec do something if a
   875  	// tracer is attached.
   876  	if !t.hasTracer() {
   877  		return
   878  	}
   879  	if t.ptraceOpts.TraceExec {
   880  		t.Debugf("Entering PTRACE_EVENT_EXEC stop")
   881  		t.ptraceEventLocked(linux.PTRACE_EVENT_EXEC, uint64(oldTID))
   882  		return
   883  	}
   884  	// "If the PTRACE_O_TRACEEXEC option is not in effect for the execing
   885  	// tracee, and if the tracee was PTRACE_ATTACHed rather that [sic]
   886  	// PTRACE_SEIZEd, the kernel delivers an extra SIGTRAP to the tracee after
   887  	// execve(2) returns. This is an ordinary signal (similar to one which can
   888  	// be generated by `kill -TRAP`, not a special kind of ptrace-stop.
   889  	// Employing PTRACE_GETSIGINFO for this signal returns si_code set to 0
   890  	// (SI_USER). This signal may be blocked by signal mask, and thus may be
   891  	// delivered (much) later." - ptrace(2)
   892  	if t.ptraceSeized {
   893  		return
   894  	}
   895  	t.tg.signalHandlers.mu.Lock()
   896  	defer t.tg.signalHandlers.mu.Unlock()
   897  	t.sendSignalLocked(&linux.SignalInfo{
   898  		Signo: int32(linux.SIGTRAP),
   899  		Code:  linux.SI_USER,
   900  	}, false /* group */)
   901  }
   902  
   903  // ptraceExit is called early in the task exit path to check if t should enter
   904  // PTRACE_EVENT_EXIT stop.
   905  func (t *Task) ptraceExit() {
   906  	if !t.hasTracer() {
   907  		return
   908  	}
   909  	t.tg.pidns.owner.mu.RLock()
   910  	defer t.tg.pidns.owner.mu.RUnlock()
   911  	if !t.ptraceOpts.TraceExit {
   912  		return
   913  	}
   914  	t.tg.signalHandlers.mu.Lock()
   915  	status := t.exitStatus.Status()
   916  	t.tg.signalHandlers.mu.Unlock()
   917  	t.Debugf("Entering PTRACE_EVENT_EXIT stop")
   918  	t.ptraceEventLocked(linux.PTRACE_EVENT_EXIT, uint64(status))
   919  }
   920  
   921  // Preconditions: The TaskSet mutex must be locked.
   922  func (t *Task) ptraceEventLocked(event int32, msg uint64) {
   923  	t.ptraceEventMsg = msg
   924  	// """
   925  	// PTRACE_EVENT stops are observed by the tracer as waitpid(2) returning
   926  	// with WIFSTOPPED(status), and WSTOPSIG(status) returns SIGTRAP. An
   927  	// additional bit is set in the higher byte of the status word: the value
   928  	// status>>8 will be
   929  	//
   930  	//   (SIGTRAP | PTRACE_EVENT_foo << 8).
   931  	//
   932  	// ...
   933  	//
   934  	// """ - ptrace(2)
   935  	t.ptraceTrapLocked(int32(linux.SIGTRAP) | (event << 8))
   936  }
   937  
   938  // ptraceKill implements ptrace(PTRACE_KILL, target). t is the caller.
   939  func (t *Task) ptraceKill(target *Task) error {
   940  	t.tg.pidns.owner.mu.Lock()
   941  	defer t.tg.pidns.owner.mu.Unlock()
   942  	if target.Tracer() != t {
   943  		return syserror.ESRCH
   944  	}
   945  	target.tg.signalHandlers.mu.Lock()
   946  	defer target.tg.signalHandlers.mu.Unlock()
   947  	// "This operation is deprecated; do not use it! Instead, send a SIGKILL
   948  	// directly using kill(2) or tgkill(2). The problem with PTRACE_KILL is
   949  	// that it requires the tracee to be in signal-delivery-stop, otherwise it
   950  	// may not work (i.e., may complete successfully but won't kill the
   951  	// tracee)." - ptrace(2)
   952  	if target.stop == nil {
   953  		return nil
   954  	}
   955  	if _, ok := target.stop.(*ptraceStop); !ok {
   956  		return nil
   957  	}
   958  	target.ptraceCode = int32(linux.SIGKILL)
   959  	target.endInternalStopLocked()
   960  	return nil
   961  }
   962  
   963  func (t *Task) ptraceInterrupt(target *Task) error {
   964  	t.tg.pidns.owner.mu.Lock()
   965  	defer t.tg.pidns.owner.mu.Unlock()
   966  	if target.Tracer() != t {
   967  		return syserror.ESRCH
   968  	}
   969  	if !target.ptraceSeized {
   970  		return syserror.EIO
   971  	}
   972  	target.tg.signalHandlers.mu.Lock()
   973  	defer target.tg.signalHandlers.mu.Unlock()
   974  	if target.killedLocked() || target.exitState >= TaskExitInitiated {
   975  		return nil
   976  	}
   977  	target.trapStopPending = true
   978  	if s, ok := target.stop.(*ptraceStop); ok && s.listen {
   979  		target.endInternalStopLocked()
   980  	}
   981  	target.interrupt()
   982  	return nil
   983  }
   984  
   985  // Preconditions:
   986  // * The TaskSet mutex must be locked for writing.
   987  // * t must have a tracer.
   988  func (t *Task) ptraceSetOptionsLocked(opts uintptr) error {
   989  	const valid = uintptr(linux.PTRACE_O_EXITKILL |
   990  		linux.PTRACE_O_TRACESYSGOOD |
   991  		linux.PTRACE_O_TRACECLONE |
   992  		linux.PTRACE_O_TRACEEXEC |
   993  		linux.PTRACE_O_TRACEEXIT |
   994  		linux.PTRACE_O_TRACEFORK |
   995  		linux.PTRACE_O_TRACESECCOMP |
   996  		linux.PTRACE_O_TRACEVFORK |
   997  		linux.PTRACE_O_TRACEVFORKDONE)
   998  	if opts&^valid != 0 {
   999  		return linuxerr.EINVAL
  1000  	}
  1001  	t.ptraceOpts = ptraceOptions{
  1002  		ExitKill:       opts&linux.PTRACE_O_EXITKILL != 0,
  1003  		SysGood:        opts&linux.PTRACE_O_TRACESYSGOOD != 0,
  1004  		TraceClone:     opts&linux.PTRACE_O_TRACECLONE != 0,
  1005  		TraceExec:      opts&linux.PTRACE_O_TRACEEXEC != 0,
  1006  		TraceExit:      opts&linux.PTRACE_O_TRACEEXIT != 0,
  1007  		TraceFork:      opts&linux.PTRACE_O_TRACEFORK != 0,
  1008  		TraceSeccomp:   opts&linux.PTRACE_O_TRACESECCOMP != 0,
  1009  		TraceVfork:     opts&linux.PTRACE_O_TRACEVFORK != 0,
  1010  		TraceVforkDone: opts&linux.PTRACE_O_TRACEVFORKDONE != 0,
  1011  	}
  1012  	return nil
  1013  }
  1014  
  1015  // Ptrace implements the ptrace system call.
  1016  func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
  1017  	// PTRACE_TRACEME ignores all other arguments.
  1018  	if req == linux.PTRACE_TRACEME {
  1019  		return t.ptraceTraceme()
  1020  	}
  1021  	// All other ptrace requests operate on a current or future tracee
  1022  	// specified by pid.
  1023  	target := t.tg.pidns.TaskWithID(pid)
  1024  	if target == nil {
  1025  		return syserror.ESRCH
  1026  	}
  1027  
  1028  	// PTRACE_ATTACH and PTRACE_SEIZE do not require that target is not already
  1029  	// a tracee.
  1030  	if req == linux.PTRACE_ATTACH || req == linux.PTRACE_SEIZE {
  1031  		seize := req == linux.PTRACE_SEIZE
  1032  		if seize && addr != 0 {
  1033  			return syserror.EIO
  1034  		}
  1035  		return t.ptraceAttach(target, seize, uintptr(data))
  1036  	}
  1037  	// PTRACE_KILL and PTRACE_INTERRUPT require that the target is a tracee,
  1038  	// but does not require that it is ptrace-stopped.
  1039  	if req == linux.PTRACE_KILL {
  1040  		return t.ptraceKill(target)
  1041  	}
  1042  	if req == linux.PTRACE_INTERRUPT {
  1043  		return t.ptraceInterrupt(target)
  1044  	}
  1045  	// All other ptrace requests require that the target is a ptrace-stopped
  1046  	// tracee, and freeze the ptrace-stop so the tracee can be operated on.
  1047  	t.tg.pidns.owner.mu.RLock()
  1048  	if target.Tracer() != t {
  1049  		t.tg.pidns.owner.mu.RUnlock()
  1050  		return syserror.ESRCH
  1051  	}
  1052  	if !target.ptraceFreeze() {
  1053  		t.tg.pidns.owner.mu.RUnlock()
  1054  		// "Most ptrace commands (all except PTRACE_ATTACH, PTRACE_SEIZE,
  1055  		// PTRACE_TRACEME, PTRACE_INTERRUPT, and PTRACE_KILL) require the
  1056  		// tracee to be in a ptrace-stop, otherwise they fail with ESRCH." -
  1057  		// ptrace(2)
  1058  		return syserror.ESRCH
  1059  	}
  1060  	t.tg.pidns.owner.mu.RUnlock()
  1061  	// Even if the target has a ptrace-stop active, the tracee's task goroutine
  1062  	// may not yet have reached Task.doStop; wait for it to do so. This is safe
  1063  	// because there's no way for target to initiate a ptrace-stop and then
  1064  	// block (by calling Task.block) before entering it.
  1065  	//
  1066  	// Caveat: If tasks were just restored, the tracee's first call to
  1067  	// Task.Activate (in Task.run) occurs before its first call to Task.doStop,
  1068  	// which may block if the tracer's address space is active.
  1069  	t.UninterruptibleSleepStart(true)
  1070  	target.waitGoroutineStoppedOrExited()
  1071  	t.UninterruptibleSleepFinish(true)
  1072  
  1073  	// Resuming commands end the ptrace stop, but only if successful.
  1074  	// PTRACE_LISTEN ends the ptrace stop if trapNotifyPending is already set on the
  1075  	// target.
  1076  	switch req {
  1077  	case linux.PTRACE_DETACH:
  1078  		if err := t.ptraceDetach(target, linux.Signal(data)); err != nil {
  1079  			target.ptraceUnfreeze()
  1080  			return err
  1081  		}
  1082  		return nil
  1083  
  1084  	case linux.PTRACE_CONT:
  1085  		if err := target.ptraceUnstop(ptraceSyscallNone, false, linux.Signal(data)); err != nil {
  1086  			target.ptraceUnfreeze()
  1087  			return err
  1088  		}
  1089  		return nil
  1090  
  1091  	case linux.PTRACE_SYSCALL:
  1092  		if err := target.ptraceUnstop(ptraceSyscallIntercept, false, linux.Signal(data)); err != nil {
  1093  			target.ptraceUnfreeze()
  1094  			return err
  1095  		}
  1096  		return nil
  1097  
  1098  	case linux.PTRACE_SINGLESTEP:
  1099  		if err := target.ptraceUnstop(ptraceSyscallNone, true, linux.Signal(data)); err != nil {
  1100  			target.ptraceUnfreeze()
  1101  			return err
  1102  		}
  1103  		return nil
  1104  
  1105  	case linux.PTRACE_SYSEMU:
  1106  		if err := target.ptraceUnstop(ptraceSyscallEmu, false, linux.Signal(data)); err != nil {
  1107  			target.ptraceUnfreeze()
  1108  			return err
  1109  		}
  1110  		return nil
  1111  
  1112  	case linux.PTRACE_SYSEMU_SINGLESTEP:
  1113  		if err := target.ptraceUnstop(ptraceSyscallEmu, true, linux.Signal(data)); err != nil {
  1114  			target.ptraceUnfreeze()
  1115  			return err
  1116  		}
  1117  		return nil
  1118  
  1119  	case linux.PTRACE_LISTEN:
  1120  		t.tg.pidns.owner.mu.RLock()
  1121  		defer t.tg.pidns.owner.mu.RUnlock()
  1122  		if !target.ptraceSeized {
  1123  			return syserror.EIO
  1124  		}
  1125  		if target.ptraceSiginfo == nil {
  1126  			return syserror.EIO
  1127  		}
  1128  		if target.ptraceSiginfo.Code>>8 != linux.PTRACE_EVENT_STOP {
  1129  			return syserror.EIO
  1130  		}
  1131  		target.tg.signalHandlers.mu.Lock()
  1132  		defer target.tg.signalHandlers.mu.Unlock()
  1133  		if target.trapNotifyPending {
  1134  			target.endInternalStopLocked()
  1135  		} else {
  1136  			target.stop.(*ptraceStop).listen = true
  1137  			target.ptraceUnfreezeLocked()
  1138  		}
  1139  		return nil
  1140  	}
  1141  
  1142  	// All other ptrace requests expect us to unfreeze the stop.
  1143  	defer target.ptraceUnfreeze()
  1144  
  1145  	switch req {
  1146  	case linux.PTRACE_PEEKTEXT, linux.PTRACE_PEEKDATA:
  1147  		// "At the system call level, the PTRACE_PEEKTEXT, PTRACE_PEEKDATA, and
  1148  		// PTRACE_PEEKUSER requests have a different API: they store the result
  1149  		// at the address specified by the data parameter, and the return value
  1150  		// is the error flag." - ptrace(2)
  1151  		word := t.Arch().Native(0)
  1152  		if _, err := word.CopyIn(target.CopyContext(t, usermem.IOOpts{IgnorePermissions: true}), addr); err != nil {
  1153  			return err
  1154  		}
  1155  		_, err := word.CopyOut(t, data)
  1156  		return err
  1157  
  1158  	case linux.PTRACE_POKETEXT, linux.PTRACE_POKEDATA:
  1159  		word := t.Arch().Native(uintptr(data))
  1160  		_, err := word.CopyOut(target.CopyContext(t, usermem.IOOpts{IgnorePermissions: true}), addr)
  1161  		return err
  1162  
  1163  	case linux.PTRACE_GETREGSET:
  1164  		// "Read the tracee's registers. addr specifies, in an
  1165  		// architecture-dependent way, the type of registers to be read. ...
  1166  		// data points to a struct iovec, which describes the destination
  1167  		// buffer's location and length. On return, the kernel modifies iov.len
  1168  		// to indicate the actual number of bytes returned." - ptrace(2)
  1169  		ars, err := t.CopyInIovecs(data, 1)
  1170  		if err != nil {
  1171  			return err
  1172  		}
  1173  
  1174  		t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch())
  1175  
  1176  		ar := ars.Head()
  1177  		n, err := target.Arch().PtraceGetRegSet(uintptr(addr), &usermem.IOReadWriter{
  1178  			Ctx:  t,
  1179  			IO:   t.MemoryManager(),
  1180  			Addr: ar.Start,
  1181  			Opts: usermem.IOOpts{
  1182  				AddressSpaceActive: true,
  1183  			},
  1184  		}, int(ar.Length()))
  1185  		if err != nil {
  1186  			return err
  1187  		}
  1188  
  1189  		// Update iovecs to represent the range of the written register set.
  1190  		end, ok := ar.Start.AddLength(uint64(n))
  1191  		if !ok {
  1192  			panic(fmt.Sprintf("%#x + %#x overflows. Invalid reg size > %#x", ar.Start, n, ar.Length()))
  1193  		}
  1194  		ar.End = end
  1195  		return t.CopyOutIovecs(data, hostarch.AddrRangeSeqOf(ar))
  1196  
  1197  	case linux.PTRACE_SETREGSET:
  1198  		ars, err := t.CopyInIovecs(data, 1)
  1199  		if err != nil {
  1200  			return err
  1201  		}
  1202  
  1203  		mm := t.MemoryManager()
  1204  		t.p.PullFullState(mm.AddressSpace(), t.Arch())
  1205  
  1206  		ar := ars.Head()
  1207  		n, err := target.Arch().PtraceSetRegSet(uintptr(addr), &usermem.IOReadWriter{
  1208  			Ctx:  t,
  1209  			IO:   mm,
  1210  			Addr: ar.Start,
  1211  			Opts: usermem.IOOpts{
  1212  				AddressSpaceActive: true,
  1213  			},
  1214  		}, int(ar.Length()))
  1215  		if err != nil {
  1216  			return err
  1217  		}
  1218  		t.p.FullStateChanged()
  1219  		ar.End -= hostarch.Addr(n)
  1220  		return t.CopyOutIovecs(data, hostarch.AddrRangeSeqOf(ar))
  1221  
  1222  	case linux.PTRACE_GETSIGINFO:
  1223  		t.tg.pidns.owner.mu.RLock()
  1224  		defer t.tg.pidns.owner.mu.RUnlock()
  1225  		if target.ptraceSiginfo == nil {
  1226  			return linuxerr.EINVAL
  1227  		}
  1228  		_, err := target.ptraceSiginfo.CopyOut(t, data)
  1229  		return err
  1230  
  1231  	case linux.PTRACE_SETSIGINFO:
  1232  		var info linux.SignalInfo
  1233  		if _, err := info.CopyIn(t, data); err != nil {
  1234  			return err
  1235  		}
  1236  		t.tg.pidns.owner.mu.RLock()
  1237  		defer t.tg.pidns.owner.mu.RUnlock()
  1238  		if target.ptraceSiginfo == nil {
  1239  			return linuxerr.EINVAL
  1240  		}
  1241  		target.ptraceSiginfo = &info
  1242  		return nil
  1243  
  1244  	case linux.PTRACE_GETSIGMASK:
  1245  		if addr != linux.SignalSetSize {
  1246  			return linuxerr.EINVAL
  1247  		}
  1248  		mask := target.SignalMask()
  1249  		_, err := mask.CopyOut(t, data)
  1250  		return err
  1251  
  1252  	case linux.PTRACE_SETSIGMASK:
  1253  		if addr != linux.SignalSetSize {
  1254  			return linuxerr.EINVAL
  1255  		}
  1256  		var mask linux.SignalSet
  1257  		if _, err := mask.CopyIn(t, data); err != nil {
  1258  			return err
  1259  		}
  1260  		// The target's task goroutine is stopped, so this is safe:
  1261  		target.SetSignalMask(mask &^ UnblockableSignals)
  1262  		return nil
  1263  
  1264  	case linux.PTRACE_SETOPTIONS:
  1265  		t.tg.pidns.owner.mu.Lock()
  1266  		defer t.tg.pidns.owner.mu.Unlock()
  1267  		return target.ptraceSetOptionsLocked(uintptr(data))
  1268  
  1269  	case linux.PTRACE_GETEVENTMSG:
  1270  		t.tg.pidns.owner.mu.RLock()
  1271  		defer t.tg.pidns.owner.mu.RUnlock()
  1272  		_, err := primitive.CopyUint64Out(t, hostarch.Addr(data), target.ptraceEventMsg)
  1273  		return err
  1274  
  1275  	// PEEKSIGINFO is unimplemented but seems to have no users anywhere.
  1276  
  1277  	default:
  1278  		return t.ptraceArch(target, req, addr, data)
  1279  	}
  1280  }