gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/kernel/ptrace.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"gvisor.dev/gvisor/pkg/abi/linux"
    21  	"gvisor.dev/gvisor/pkg/errors/linuxerr"
    22  	"gvisor.dev/gvisor/pkg/hostarch"
    23  	"gvisor.dev/gvisor/pkg/marshal/primitive"
    24  	"gvisor.dev/gvisor/pkg/sentry/mm"
    25  	"gvisor.dev/gvisor/pkg/usermem"
    26  )
    27  
    28  // ptraceOptions are the subset of options controlling a task's ptrace behavior
    29  // that are set by ptrace(PTRACE_SETOPTIONS).
    30  //
    31  // +stateify savable
    32  type ptraceOptions struct {
    33  	// ExitKill is true if the tracee should be sent SIGKILL when the tracer
    34  	// exits.
    35  	ExitKill bool
    36  
    37  	// If SysGood is true, set bit 7 in the signal number for
    38  	// syscall-entry-stop and syscall-exit-stop traps delivered to this task's
    39  	// tracer.
    40  	SysGood bool
    41  
    42  	// TraceClone is true if the tracer wants to receive PTRACE_EVENT_CLONE
    43  	// events.
    44  	TraceClone bool
    45  
    46  	// TraceExec is true if the tracer wants to receive PTRACE_EVENT_EXEC
    47  	// events.
    48  	TraceExec bool
    49  
    50  	// TraceExit is true if the tracer wants to receive PTRACE_EVENT_EXIT
    51  	// events.
    52  	TraceExit bool
    53  
    54  	// TraceFork is true if the tracer wants to receive PTRACE_EVENT_FORK
    55  	// events.
    56  	TraceFork bool
    57  
    58  	// TraceSeccomp is true if the tracer wants to receive PTRACE_EVENT_SECCOMP
    59  	// events.
    60  	TraceSeccomp bool
    61  
    62  	// TraceVfork is true if the tracer wants to receive PTRACE_EVENT_VFORK
    63  	// events.
    64  	TraceVfork bool
    65  
    66  	// TraceVforkDone is true if the tracer wants to receive
    67  	// PTRACE_EVENT_VFORK_DONE events.
    68  	TraceVforkDone bool
    69  }
    70  
    71  // ptraceSyscallMode controls the behavior of a ptraced task at syscall entry
    72  // and exit.
    73  type ptraceSyscallMode int
    74  
    75  const (
    76  	// ptraceSyscallNone indicates that the task has never ptrace-stopped, or
    77  	// that it was resumed from its last ptrace-stop by PTRACE_CONT or
    78  	// PTRACE_DETACH. The task's syscalls will not be intercepted.
    79  	ptraceSyscallNone ptraceSyscallMode = iota
    80  
    81  	// ptraceSyscallIntercept indicates that the task was resumed from its last
    82  	// ptrace-stop by PTRACE_SYSCALL. The next time the task enters or exits a
    83  	// syscall, a ptrace-stop will occur.
    84  	ptraceSyscallIntercept
    85  
    86  	// ptraceSyscallEmu indicates that the task was resumed from its last
    87  	// ptrace-stop by PTRACE_SYSEMU or PTRACE_SYSEMU_SINGLESTEP. The next time
    88  	// the task enters a syscall, the syscall will be skipped, and a
    89  	// ptrace-stop will occur.
    90  	ptraceSyscallEmu
    91  )
    92  
    93  // CanTrace checks that t is permitted to access target's state, as defined by
    94  // ptrace(2), subsection "Ptrace access mode checking". If attach is true, it
    95  // checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access
    96  // mode PTRACE_MODE_READ.
    97  //
    98  // In Linux, ptrace access restrictions may be configured by LSMs. While we do
    99  // not support LSMs, we do add additional restrictions based on the commoncap
   100  // and YAMA LSMs.
   101  //
   102  // TODO(gvisor.dev/issue/212): The result of CanTrace is immediately stale (e.g., a
   103  // racing setuid(2) may change traceability). This may pose a risk when a task
   104  // changes from traceable to not traceable. This is only problematic across
   105  // execve, where privileges may increase.
   106  //
   107  // We currently do not implement privileged executables (set-user/group-ID bits
   108  // and file capabilities), so that case is not reachable.
   109  func (t *Task) CanTrace(target *Task, attach bool) bool {
   110  	// "If the calling thread and the target thread are in the same thread
   111  	// group, access is always allowed." - ptrace(2)
   112  	//
   113  	// Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access()
   114  	// should not deny sub-threads", first released in Linux 3.12), the rule
   115  	// only applies if t and target are the same task. But, as that commit
   116  	// message puts it, "[any] security check is pointless when the tasks share
   117  	// the same ->mm."
   118  	if t.tg == target.tg {
   119  		return true
   120  	}
   121  
   122  	if !t.canTraceStandard(target, attach) {
   123  		return false
   124  	}
   125  
   126  	if t.k.YAMAPtraceScope.Load() == linux.YAMA_SCOPE_RELATIONAL {
   127  		t.tg.pidns.owner.mu.RLock()
   128  		defer t.tg.pidns.owner.mu.RUnlock()
   129  		if !t.canTraceYAMALocked(target) {
   130  			return false
   131  		}
   132  	}
   133  	return true
   134  }
   135  
   136  // canTraceLocked is the same as CanTrace, except the caller must already hold
   137  // the TaskSet mutex (for reading or writing).
   138  func (t *Task) canTraceLocked(target *Task, attach bool) bool {
   139  	if t.tg == target.tg {
   140  		return true
   141  	}
   142  
   143  	if !t.canTraceStandard(target, attach) {
   144  		return false
   145  	}
   146  
   147  	if t.k.YAMAPtraceScope.Load() == linux.YAMA_SCOPE_RELATIONAL {
   148  		if !t.canTraceYAMALocked(target) {
   149  			return false
   150  		}
   151  	}
   152  	return true
   153  }
   154  
   155  // canTraceStandard performs standard ptrace access checks as defined by
   156  // kernel/ptrace.c:__ptrace_may_access as well as the commoncap LSM
   157  // implementation of the security_ptrace_access_check() interface, which is
   158  // always invoked.
   159  func (t *Task) canTraceStandard(target *Task, attach bool) bool {
   160  	// """
   161  	// TODO(gvisor.dev/issue/260): 1. If the access mode specifies
   162  	// PTRACE_MODE_FSCREDS (ED: snipped, doesn't exist until Linux 4.5).
   163  	//
   164  	// Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the
   165  	// caller's real UID and GID for the checks in the next step. (Most APIs
   166  	// that check the caller's UID and GID use the effective IDs. For
   167  	// historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs
   168  	// instead.)
   169  	//
   170  	// 2. Deny access if neither of the following is true:
   171  	//
   172  	//	- The real, effective, and saved-set user IDs of the target match the
   173  	//		caller's user ID, *and* the real, effective, and saved-set group IDs of
   174  	//		the target match the caller's group ID.
   175  	//
   176  	//	- The caller has the CAP_SYS_PTRACE capability in the user namespace of
   177  	//		the target.
   178  	//
   179  	// 3. Deny access if the target process "dumpable" attribute has a value
   180  	// other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in
   181  	// prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in
   182  	// the user namespace of the target process.
   183  	//
   184  	// 4. The commoncap LSM performs the following steps:
   185  	//
   186  	// a) If the access mode includes PTRACE_MODE_FSCREDS, then use the
   187  	// caller's effective capability set; otherwise (the access mode specifies
   188  	// PTRACE_MODE_REALCREDS, so) use the caller's permitted capability set.
   189  	//
   190  	// b) Deny access if neither of the following is true:
   191  	//
   192  	//	- The caller and the target process are in the same user namespace, and
   193  	//		the caller's capabilities are a proper superset of the target process's
   194  	//		permitted capabilities.
   195  	//
   196  	//	- The caller has the CAP_SYS_PTRACE capability in the target process's
   197  	//		user namespace.
   198  	//
   199  	// Note that the commoncap LSM does not distinguish between
   200  	// PTRACE_MODE_READ and PTRACE_MODE_ATTACH. (ED: From earlier in this
   201  	// section: "the commoncap LSM ... is always invoked".)
   202  	// """
   203  	callerCreds := t.Credentials()
   204  	targetCreds := target.Credentials()
   205  	if callerCreds.HasCapabilityIn(linux.CAP_SYS_PTRACE, targetCreds.UserNamespace) {
   206  		return true
   207  	}
   208  	if cuid := callerCreds.RealKUID; cuid != targetCreds.RealKUID || cuid != targetCreds.EffectiveKUID || cuid != targetCreds.SavedKUID {
   209  		return false
   210  	}
   211  	if cgid := callerCreds.RealKGID; cgid != targetCreds.RealKGID || cgid != targetCreds.EffectiveKGID || cgid != targetCreds.SavedKGID {
   212  		return false
   213  	}
   214  	var targetMM *mm.MemoryManager
   215  	target.WithMuLocked(func(t *Task) {
   216  		targetMM = t.MemoryManager()
   217  	})
   218  	if targetMM != nil && targetMM.Dumpability() != mm.UserDumpable {
   219  		return false
   220  	}
   221  	if callerCreds.UserNamespace != targetCreds.UserNamespace {
   222  		return false
   223  	}
   224  	if targetCreds.PermittedCaps&^callerCreds.PermittedCaps != 0 {
   225  		return false
   226  	}
   227  	return true
   228  }
   229  
   230  // canTraceYAMALocked performs ptrace access checks as defined by the YAMA LSM
   231  // implementation of the security_ptrace_access_check() interface, with YAMA
   232  // configured to mode 1. This is a common default among various Linux
   233  // distributions.
   234  //
   235  // It only permits the tracer to proceed if one of the following conditions is
   236  // met:
   237  //
   238  // a) The tracer is already attached to the tracee.
   239  //
   240  // b) The target is a descendant of the tracer.
   241  //
   242  // c) The target has explicitly given permission to the tracer through the
   243  // PR_SET_PTRACER prctl.
   244  //
   245  // d) The tracer has CAP_SYS_PTRACE.
   246  //
   247  // See security/yama/yama_lsm.c:yama_ptrace_access_check.
   248  //
   249  // Precondition: the TaskSet mutex must be locked (for reading or writing).
   250  func (t *Task) canTraceYAMALocked(target *Task) bool {
   251  	if tracer := target.Tracer(); tracer != nil {
   252  		if tracer.tg == t.tg {
   253  			return true
   254  		}
   255  	}
   256  	if target.isYAMADescendantOfLocked(t) {
   257  		return true
   258  	}
   259  	if target.hasYAMAExceptionForLocked(t) {
   260  		return true
   261  	}
   262  	if t.HasCapabilityIn(linux.CAP_SYS_PTRACE, target.UserNamespace()) {
   263  		return true
   264  	}
   265  	return false
   266  }
   267  
   268  // Determines whether t is considered a descendant of ancestor for the purposes
   269  // of YAMA permissions (specifically, whether t's thread group is descended from
   270  // ancestor's).
   271  //
   272  // Precondition: the TaskSet mutex must be locked (for reading or writing).
   273  func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool {
   274  	walker := t
   275  	for walker != nil {
   276  		if walker.tg.leader == ancestor.tg.leader {
   277  			return true
   278  		}
   279  		walker = walker.parent
   280  	}
   281  	return false
   282  }
   283  
   284  // Precondition: the TaskSet mutex must be locked (for reading or writing).
   285  func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool {
   286  	allowed, ok := t.k.ptraceExceptions[t.tg.leader]
   287  	if !ok {
   288  		return false
   289  	}
   290  	return allowed == nil || tracer.isYAMADescendantOfLocked(allowed)
   291  }
   292  
   293  // ClearYAMAException removes any YAMA exception with t as the tracee.
   294  func (t *Task) ClearYAMAException() {
   295  	t.tg.pidns.owner.mu.Lock()
   296  	defer t.tg.pidns.owner.mu.Unlock()
   297  	tracee := t.tg.leader
   298  	delete(t.k.ptraceExceptions, tracee)
   299  }
   300  
   301  // SetYAMAException creates a YAMA exception allowing all descendants of tracer
   302  // to trace t. If tracer is nil, then any task is allowed to trace t.
   303  //
   304  // If there was an existing exception, it is overwritten with the new one.
   305  func (t *Task) SetYAMAException(tracer *Task) {
   306  	t.tg.pidns.owner.mu.Lock()
   307  	defer t.tg.pidns.owner.mu.Unlock()
   308  
   309  	tracee := t.tg.leader
   310  	tracee.ptraceYAMAExceptionAdded = true
   311  	if tracer != nil {
   312  		tracer.ptraceYAMAExceptionAdded = true
   313  	}
   314  
   315  	t.k.ptraceExceptions[tracee] = tracer
   316  }
   317  
   318  // Tracer returns t's ptrace Tracer.
   319  func (t *Task) Tracer() *Task {
   320  	return t.ptraceTracer.Load()
   321  }
   322  
   323  // hasTracer returns true if t has a ptrace tracer attached.
   324  func (t *Task) hasTracer() bool {
   325  	// This isn't just inlined into callers so that if Task.Tracer() turns out
   326  	// to be too expensive because of e.g. interface conversion, we can switch
   327  	// to having a separate atomic flag more easily.
   328  	return t.Tracer() != nil
   329  }
   330  
   331  // ptraceStop is a TaskStop placed on tasks in a ptrace-stop.
   332  //
   333  // +stateify savable
   334  type ptraceStop struct {
   335  	// If frozen is true, the stopped task's tracer is currently operating on
   336  	// it, so Task.Kill should not remove the stop.
   337  	frozen bool
   338  
   339  	// If listen is true, the stopped task's tracer invoked PTRACE_LISTEN, so
   340  	// ptraceFreeze should fail.
   341  	listen bool
   342  }
   343  
   344  // Killable implements TaskStop.Killable.
   345  func (s *ptraceStop) Killable() bool {
   346  	return !s.frozen
   347  }
   348  
   349  // beginPtraceStopLocked initiates an unfrozen ptrace-stop on t. If t has been
   350  // killed, the stop is skipped, and beginPtraceStopLocked returns false.
   351  //
   352  // beginPtraceStopLocked does not signal t's tracer or wake it if it is
   353  // waiting.
   354  //
   355  // Preconditions:
   356  //   - The TaskSet mutex must be locked.
   357  //   - The caller must be running on the task goroutine.
   358  func (t *Task) beginPtraceStopLocked() bool {
   359  	t.tg.signalHandlers.mu.Lock()
   360  	defer t.tg.signalHandlers.mu.Unlock()
   361  	// This is analogous to Linux's kernel/signal.c:ptrace_stop() => ... =>
   362  	// kernel/sched/core.c:__schedule() => signal_pending_state() check, which
   363  	// is what prevents tasks from entering ptrace-stops after being killed.
   364  	// Note that if t was SIGKILLed and beingPtraceStopLocked is being called
   365  	// for PTRACE_EVENT_EXIT, the task will have dequeued the signal before
   366  	// entering the exit path, so t.killedLocked() will no longer return true.
   367  	// This is consistent with Linux: "Bugs: ... A SIGKILL signal may still
   368  	// cause a PTRACE_EVENT_EXIT stop before actual signal death. This may be
   369  	// changed in the future; SIGKILL is meant to always immediately kill tasks
   370  	// even under ptrace. Last confirmed on Linux 3.13." - ptrace(2)
   371  	if t.killedLocked() {
   372  		return false
   373  	}
   374  	t.beginInternalStopLocked(&ptraceStop{})
   375  	return true
   376  }
   377  
   378  // Preconditions: The TaskSet mutex must be locked.
   379  func (t *Task) ptraceTrapLocked(code int32) {
   380  	// This is unconditional in ptrace_stop().
   381  	t.tg.signalHandlers.mu.Lock()
   382  	t.trapStopPending = false
   383  	t.tg.signalHandlers.mu.Unlock()
   384  	t.ptraceCode = code
   385  	t.ptraceSiginfo = &linux.SignalInfo{
   386  		Signo: int32(linux.SIGTRAP),
   387  		Code:  code,
   388  	}
   389  	t.ptraceSiginfo.SetPID(int32(t.tg.pidns.tids[t]))
   390  	t.ptraceSiginfo.SetUID(int32(t.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()))
   391  	if t.beginPtraceStopLocked() {
   392  		tracer := t.Tracer()
   393  		tracer.signalStop(t, linux.CLD_TRAPPED, int32(linux.SIGTRAP))
   394  		tracer.tg.eventQueue.Notify(EventTraceeStop)
   395  	}
   396  }
   397  
   398  // ptraceFreeze checks if t is in a ptraceStop. If so, it freezes the
   399  // ptraceStop, temporarily preventing it from being removed by a concurrent
   400  // Task.Kill, and returns true. Otherwise it returns false.
   401  //
   402  // Preconditions:
   403  //   - The TaskSet mutex must be locked.
   404  //   - The caller must be running on the task goroutine of t's tracer.
   405  func (t *Task) ptraceFreeze() bool {
   406  	t.tg.signalHandlers.mu.Lock()
   407  	defer t.tg.signalHandlers.mu.Unlock()
   408  	if t.stop == nil {
   409  		return false
   410  	}
   411  	s, ok := t.stop.(*ptraceStop)
   412  	if !ok {
   413  		return false
   414  	}
   415  	if s.listen {
   416  		return false
   417  	}
   418  	s.frozen = true
   419  	return true
   420  }
   421  
   422  // ptraceUnfreeze ends the effect of a previous successful call to
   423  // ptraceFreeze.
   424  //
   425  // Preconditions: t must be in a frozen ptraceStop.
   426  func (t *Task) ptraceUnfreeze() {
   427  	// t.tg.signalHandlers is stable because t is in a frozen ptrace-stop,
   428  	// preventing its thread group from completing execve.
   429  	t.tg.signalHandlers.mu.Lock()
   430  	defer t.tg.signalHandlers.mu.Unlock()
   431  	t.ptraceUnfreezeLocked()
   432  }
   433  
   434  // Preconditions:
   435  //   - t must be in a frozen ptraceStop.
   436  //   - t's signal mutex must be locked.
   437  func (t *Task) ptraceUnfreezeLocked() {
   438  	// Do this even if the task has been killed to ensure a panic if t.stop is
   439  	// nil or not a ptraceStop.
   440  	t.stop.(*ptraceStop).frozen = false
   441  	if t.killedLocked() {
   442  		t.endInternalStopLocked()
   443  	}
   444  }
   445  
   446  // ptraceUnstop implements ptrace request PTRACE_CONT, PTRACE_SYSCALL,
   447  // PTRACE_SINGLESTEP, PTRACE_SYSEMU, or PTRACE_SYSEMU_SINGLESTEP depending on
   448  // mode and singlestep.
   449  //
   450  // Preconditions: t must be in a frozen ptrace stop.
   451  //
   452  // Postconditions: If ptraceUnstop returns nil, t will no longer be in a ptrace
   453  // stop.
   454  func (t *Task) ptraceUnstop(mode ptraceSyscallMode, singlestep bool, sig linux.Signal) error {
   455  	if sig != 0 && !sig.IsValid() {
   456  		return linuxerr.EIO
   457  	}
   458  	t.tg.pidns.owner.mu.Lock()
   459  	defer t.tg.pidns.owner.mu.Unlock()
   460  	t.ptraceCode = int32(sig)
   461  	t.ptraceSyscallMode = mode
   462  	t.ptraceSinglestep = singlestep
   463  	t.tg.signalHandlers.mu.Lock()
   464  	defer t.tg.signalHandlers.mu.Unlock()
   465  	t.endInternalStopLocked()
   466  	return nil
   467  }
   468  
   469  func (t *Task) ptraceTraceme() error {
   470  	t.tg.pidns.owner.mu.Lock()
   471  	defer t.tg.pidns.owner.mu.Unlock()
   472  	if t.hasTracer() {
   473  		return linuxerr.EPERM
   474  	}
   475  	if t.parent == nil {
   476  		// In Linux, only init can not have a parent, and init is assumed never
   477  		// to invoke PTRACE_TRACEME. In the sentry, TGID 1 is an arbitrary user
   478  		// application that may invoke PTRACE_TRACEME; having no parent can
   479  		// also occur if all tasks in the parent thread group have exited, and
   480  		// failed to find a living thread group to reparent to. The former case
   481  		// is treated as if TGID 1 has an exited parent in an invisible
   482  		// ancestor PID namespace that is an owner of the root user namespace
   483  		// (and consequently has CAP_SYS_PTRACE), and the latter case is a
   484  		// special form of the exited parent case below. In either case,
   485  		// returning nil here is correct.
   486  		return nil
   487  	}
   488  	if !t.parent.canTraceLocked(t, true) {
   489  		return linuxerr.EPERM
   490  	}
   491  	if t.parent.exitState != TaskExitNone {
   492  		// Fail silently, as if we were successfully attached but then
   493  		// immediately detached. This is consistent with Linux.
   494  		return nil
   495  	}
   496  	t.ptraceTracer.Store(t.parent)
   497  	t.parent.ptraceTracees[t] = struct{}{}
   498  	return nil
   499  }
   500  
   501  // ptraceAttach implements ptrace(PTRACE_ATTACH, target) if seize is false, and
   502  // ptrace(PTRACE_SEIZE, target, 0, opts) if seize is true. t is the caller.
   503  func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
   504  	if t.tg == target.tg {
   505  		return linuxerr.EPERM
   506  	}
   507  	t.tg.pidns.owner.mu.Lock()
   508  	defer t.tg.pidns.owner.mu.Unlock()
   509  	if !t.canTraceLocked(target, true) {
   510  		return linuxerr.EPERM
   511  	}
   512  	if target.hasTracer() {
   513  		return linuxerr.EPERM
   514  	}
   515  	// Attaching to zombies and dead tasks is not permitted; the exit
   516  	// notification logic relies on this. Linux allows attaching to PF_EXITING
   517  	// tasks, though.
   518  	if target.exitState >= TaskExitZombie {
   519  		return linuxerr.EPERM
   520  	}
   521  	if seize {
   522  		if err := target.ptraceSetOptionsLocked(opts); err != nil {
   523  			return linuxerr.EIO
   524  		}
   525  	}
   526  	target.ptraceTracer.Store(t)
   527  	t.ptraceTracees[target] = struct{}{}
   528  	target.ptraceSeized = seize
   529  	target.tg.signalHandlers.mu.Lock()
   530  	// "Unlike PTRACE_ATTACH, PTRACE_SEIZE does not stop the process." -
   531  	// ptrace(2)
   532  	if !seize {
   533  		target.sendSignalLocked(&linux.SignalInfo{
   534  			Signo: int32(linux.SIGSTOP),
   535  			Code:  linux.SI_USER,
   536  		}, false /* group */)
   537  	}
   538  	// Undocumented Linux feature: If the tracee is already group-stopped (and
   539  	// consequently will not report the SIGSTOP just sent), force it to leave
   540  	// and re-enter the stop so that it will switch to a ptrace-stop.
   541  	if target.stop == (*groupStop)(nil) {
   542  		target.trapStopPending = true
   543  		target.endInternalStopLocked()
   544  		// TODO(jamieliu): Linux blocks ptrace_attach() until the task has
   545  		// entered the ptrace-stop (or exited) via JOBCTL_TRAPPING.
   546  	}
   547  	target.tg.signalHandlers.mu.Unlock()
   548  	return nil
   549  }
   550  
   551  // ptraceDetach implements ptrace(PTRACE_DETACH, target, 0, sig). t is the
   552  // caller.
   553  //
   554  // Preconditions: target must be a tracee of t in a frozen ptrace stop.
   555  //
   556  // Postconditions: If ptraceDetach returns nil, target will no longer be in a
   557  // ptrace stop.
   558  func (t *Task) ptraceDetach(target *Task, sig linux.Signal) error {
   559  	if sig != 0 && !sig.IsValid() {
   560  		return linuxerr.EIO
   561  	}
   562  	t.tg.pidns.owner.mu.Lock()
   563  	defer t.tg.pidns.owner.mu.Unlock()
   564  	target.ptraceCode = int32(sig)
   565  	target.forgetTracerLocked()
   566  	delete(t.ptraceTracees, target)
   567  	return nil
   568  }
   569  
   570  // exitPtrace is called in the exit path to detach all of t's tracees.
   571  func (t *Task) exitPtrace() {
   572  	t.tg.pidns.owner.mu.Lock()
   573  	defer t.tg.pidns.owner.mu.Unlock()
   574  	for target := range t.ptraceTracees {
   575  		if target.ptraceOpts.ExitKill {
   576  			target.tg.signalHandlers.mu.Lock()
   577  			target.sendSignalLocked(&linux.SignalInfo{
   578  				Signo: int32(linux.SIGKILL),
   579  			}, false /* group */)
   580  			target.tg.signalHandlers.mu.Unlock()
   581  		}
   582  		// Leave ptraceCode unchanged so that if the task is ptrace-stopped, it
   583  		// observes the ptraceCode it set before it entered the stop. I believe
   584  		// this is consistent with Linux.
   585  		target.forgetTracerLocked()
   586  	}
   587  	clear(t.ptraceTracees) // nil maps cannot be saved
   588  
   589  	if t.ptraceYAMAExceptionAdded {
   590  		delete(t.k.ptraceExceptions, t)
   591  		for tracee, tracer := range t.k.ptraceExceptions {
   592  			if tracer == t {
   593  				delete(t.k.ptraceExceptions, tracee)
   594  			}
   595  		}
   596  	}
   597  }
   598  
   599  // forgetTracerLocked detaches t's tracer and ensures that t is no longer
   600  // ptrace-stopped.
   601  //
   602  // Preconditions: The TaskSet mutex must be locked for writing.
   603  func (t *Task) forgetTracerLocked() {
   604  	t.ptraceSeized = false
   605  	t.ptraceOpts = ptraceOptions{}
   606  	t.ptraceSyscallMode = ptraceSyscallNone
   607  	t.ptraceSinglestep = false
   608  	t.ptraceTracer.Store(nil)
   609  	if t.exitTracerNotified && !t.exitTracerAcked {
   610  		t.exitTracerAcked = true
   611  		t.exitNotifyLocked(true)
   612  	}
   613  	t.tg.signalHandlers.mu.Lock()
   614  	defer t.tg.signalHandlers.mu.Unlock()
   615  	// Unset t.trapStopPending, which might have been set by PTRACE_INTERRUPT. If
   616  	// it wasn't, it will be reset via t.groupStopPending after the following.
   617  	t.trapStopPending = false
   618  	// If t's thread group is in a group stop and t is eligible to participate,
   619  	// make it do so. This is essentially the reverse of the special case in
   620  	// ptraceAttach, which converts a group stop to a ptrace stop. ("Handling
   621  	// of restart from group-stop is currently buggy, but the "as planned"
   622  	// behavior is to leave tracee stopped and waiting for SIGCONT." -
   623  	// ptrace(2))
   624  	if (t.tg.groupStopComplete || t.tg.groupStopPendingCount != 0) && !t.groupStopPending && t.exitState < TaskExitInitiated {
   625  		t.groupStopPending = true
   626  		// t already participated in the group stop when it unset
   627  		// groupStopPending.
   628  		t.groupStopAcknowledged = true
   629  		t.interrupt()
   630  	}
   631  	if _, ok := t.stop.(*ptraceStop); ok {
   632  		t.endInternalStopLocked()
   633  	}
   634  }
   635  
   636  // ptraceSignalLocked is called after signal dequeueing to check if t should
   637  // enter ptrace signal-delivery-stop.
   638  //
   639  // Preconditions:
   640  //   - The signal mutex must be locked.
   641  //   - The caller must be running on the task goroutine.
   642  //
   643  // +checklocks:t.tg.signalHandlers.mu
   644  func (t *Task) ptraceSignalLocked(info *linux.SignalInfo) bool {
   645  	if linux.Signal(info.Signo) == linux.SIGKILL {
   646  		return false
   647  	}
   648  	if !t.hasTracer() {
   649  		return false
   650  	}
   651  	// The tracer might change this signal into a stop signal, in which case
   652  	// any SIGCONT received after the signal was originally dequeued should
   653  	// cancel it. This is consistent with Linux.
   654  	t.tg.groupStopDequeued = true
   655  	// This is unconditional in ptrace_stop().
   656  	t.trapStopPending = false
   657  	// Can't lock the TaskSet mutex while holding a signal mutex.
   658  	t.tg.signalHandlers.mu.Unlock()
   659  	defer t.tg.signalHandlers.mu.Lock()
   660  	t.tg.pidns.owner.mu.RLock()
   661  	defer t.tg.pidns.owner.mu.RUnlock()
   662  	tracer := t.Tracer()
   663  	if tracer == nil {
   664  		return false
   665  	}
   666  	t.ptraceCode = info.Signo
   667  	t.ptraceSiginfo = info
   668  	t.Debugf("Entering signal-delivery-stop for signal %d", info.Signo)
   669  	if t.beginPtraceStopLocked() {
   670  		tracer.signalStop(t, linux.CLD_TRAPPED, info.Signo)
   671  		tracer.tg.eventQueue.Notify(EventTraceeStop)
   672  	}
   673  	return true
   674  }
   675  
   676  // ptraceSeccomp is called when a seccomp-bpf filter returns action
   677  // SECCOMP_RET_TRACE to check if t should enter PTRACE_EVENT_SECCOMP stop. data
   678  // is the lower 16 bits of the filter's return value.
   679  func (t *Task) ptraceSeccomp(data uint16) bool {
   680  	if !t.hasTracer() {
   681  		return false
   682  	}
   683  	t.tg.pidns.owner.mu.RLock()
   684  	defer t.tg.pidns.owner.mu.RUnlock()
   685  	if !t.ptraceOpts.TraceSeccomp {
   686  		return false
   687  	}
   688  	t.Debugf("Entering PTRACE_EVENT_SECCOMP stop")
   689  	t.ptraceEventLocked(linux.PTRACE_EVENT_SECCOMP, uint64(data))
   690  	return true
   691  }
   692  
   693  // ptraceSyscallEnter is called immediately before entering a syscall to check
   694  // if t should enter ptrace syscall-enter-stop.
   695  func (t *Task) ptraceSyscallEnter() (taskRunState, bool) {
   696  	if !t.hasTracer() {
   697  		return nil, false
   698  	}
   699  	t.tg.pidns.owner.mu.RLock()
   700  	defer t.tg.pidns.owner.mu.RUnlock()
   701  	switch t.ptraceSyscallMode {
   702  	case ptraceSyscallNone:
   703  		return nil, false
   704  	case ptraceSyscallIntercept:
   705  		t.Debugf("Entering syscall-enter-stop from PTRACE_SYSCALL")
   706  		t.ptraceSyscallStopLocked()
   707  		return (*runSyscallAfterSyscallEnterStop)(nil), true
   708  	case ptraceSyscallEmu:
   709  		t.Debugf("Entering syscall-enter-stop from PTRACE_SYSEMU")
   710  		t.ptraceSyscallStopLocked()
   711  		return (*runSyscallAfterSysemuStop)(nil), true
   712  	}
   713  	panic(fmt.Sprintf("Unknown ptraceSyscallMode: %v", t.ptraceSyscallMode))
   714  }
   715  
   716  // ptraceSyscallExit is called immediately after leaving a syscall to check if
   717  // t should enter ptrace syscall-exit-stop.
   718  func (t *Task) ptraceSyscallExit() {
   719  	if !t.hasTracer() {
   720  		return
   721  	}
   722  	t.tg.pidns.owner.mu.RLock()
   723  	defer t.tg.pidns.owner.mu.RUnlock()
   724  	if t.ptraceSyscallMode != ptraceSyscallIntercept {
   725  		return
   726  	}
   727  	t.Debugf("Entering syscall-exit-stop")
   728  	t.ptraceSyscallStopLocked()
   729  }
   730  
   731  // Preconditions: The TaskSet mutex must be locked.
   732  func (t *Task) ptraceSyscallStopLocked() {
   733  	code := int32(linux.SIGTRAP)
   734  	if t.ptraceOpts.SysGood {
   735  		code |= 0x80
   736  	}
   737  	t.ptraceTrapLocked(code)
   738  }
   739  
   740  type ptraceCloneKind int32
   741  
   742  const (
   743  	// ptraceCloneKindClone represents a call to Task.Clone where
   744  	// TerminationSignal is not SIGCHLD and Vfork is false.
   745  	ptraceCloneKindClone ptraceCloneKind = iota
   746  
   747  	// ptraceCloneKindFork represents a call to Task.Clone where
   748  	// TerminationSignal is SIGCHLD and Vfork is false.
   749  	ptraceCloneKindFork
   750  
   751  	// ptraceCloneKindVfork represents a call to Task.Clone where Vfork is
   752  	// true.
   753  	ptraceCloneKindVfork
   754  )
   755  
   756  // ptraceClone is called at the end of a clone or fork syscall to check if t
   757  // should enter PTRACE_EVENT_CLONE, PTRACE_EVENT_FORK, or PTRACE_EVENT_VFORK
   758  // stop. child is the new task.
   759  func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, args *linux.CloneArgs) bool {
   760  	if !t.hasTracer() {
   761  		return false
   762  	}
   763  	t.tg.pidns.owner.mu.Lock()
   764  	defer t.tg.pidns.owner.mu.Unlock()
   765  	event := false
   766  	if args.Flags&linux.CLONE_UNTRACED == 0 {
   767  		switch kind {
   768  		case ptraceCloneKindClone:
   769  			if t.ptraceOpts.TraceClone {
   770  				t.Debugf("Entering PTRACE_EVENT_CLONE stop")
   771  				t.ptraceEventLocked(linux.PTRACE_EVENT_CLONE, uint64(t.tg.pidns.tids[child]))
   772  				event = true
   773  			}
   774  		case ptraceCloneKindFork:
   775  			if t.ptraceOpts.TraceFork {
   776  				t.Debugf("Entering PTRACE_EVENT_FORK stop")
   777  				t.ptraceEventLocked(linux.PTRACE_EVENT_FORK, uint64(t.tg.pidns.tids[child]))
   778  				event = true
   779  			}
   780  		case ptraceCloneKindVfork:
   781  			if t.ptraceOpts.TraceVfork {
   782  				t.Debugf("Entering PTRACE_EVENT_VFORK stop")
   783  				t.ptraceEventLocked(linux.PTRACE_EVENT_VFORK, uint64(t.tg.pidns.tids[child]))
   784  				event = true
   785  			}
   786  		default:
   787  			panic(fmt.Sprintf("Unknown ptraceCloneKind: %v", kind))
   788  		}
   789  	}
   790  	// "If the PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, or PTRACE_O_TRACECLONE
   791  	// options are in effect, then children created by, respectively, vfork(2)
   792  	// or clone(2) with the CLONE_VFORK flag, fork(2) or clone(2) with the exit
   793  	// signal set to SIGCHLD, and other kinds of clone(2), are automatically
   794  	// attached to the same tracer which traced their parent. SIGSTOP is
   795  	// delivered to the children, causing them to enter signal-delivery-stop
   796  	// after they exit the system call which created them." - ptrace(2)
   797  	//
   798  	// clone(2)'s documentation of CLONE_UNTRACED and CLONE_PTRACE is
   799  	// confusingly wrong; see kernel/fork.c:_do_fork() => copy_process() =>
   800  	// include/linux/ptrace.h:ptrace_init_task().
   801  	if event || args.Flags&linux.CLONE_PTRACE != 0 {
   802  		tracer := t.Tracer()
   803  		if tracer != nil {
   804  			child.ptraceTracer.Store(tracer)
   805  			tracer.ptraceTracees[child] = struct{}{}
   806  			// "The "seized" behavior ... is inherited by children that are
   807  			// automatically attached using PTRACE_O_TRACEFORK,
   808  			// PTRACE_O_TRACEVFORK, and PTRACE_O_TRACECLONE." - ptrace(2)
   809  			child.ptraceSeized = t.ptraceSeized
   810  			// "Flags are inherited by new tracees created and "auto-attached"
   811  			// via active PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, or
   812  			// PTRACE_O_TRACECLONE options." - ptrace(2)
   813  			child.ptraceOpts = t.ptraceOpts
   814  			child.tg.signalHandlers.mu.Lock()
   815  			// "PTRACE_SEIZE: ... Automatically attached children stop with
   816  			// PTRACE_EVENT_STOP and WSTOPSIG(status) returns SIGTRAP instead
   817  			// of having SIGSTOP signal delivered to them." - ptrace(2)
   818  			if child.ptraceSeized {
   819  				child.trapStopPending = true
   820  			} else {
   821  				child.pendingSignals.enqueue(&linux.SignalInfo{
   822  					Signo: int32(linux.SIGSTOP),
   823  				}, nil)
   824  			}
   825  			// The child will self-interrupt() when its task goroutine starts
   826  			// running, so we don't have to.
   827  			child.tg.signalHandlers.mu.Unlock()
   828  		}
   829  	}
   830  	return event
   831  }
   832  
   833  // ptraceVforkDone is called after the end of a vfork stop to check if t should
   834  // enter PTRACE_EVENT_VFORK_DONE stop. child is the new task's thread ID in t's
   835  // PID namespace.
   836  func (t *Task) ptraceVforkDone(child ThreadID) bool {
   837  	if !t.hasTracer() {
   838  		return false
   839  	}
   840  	t.tg.pidns.owner.mu.RLock()
   841  	defer t.tg.pidns.owner.mu.RUnlock()
   842  	if !t.ptraceOpts.TraceVforkDone {
   843  		return false
   844  	}
   845  	t.Debugf("Entering PTRACE_EVENT_VFORK_DONE stop")
   846  	t.ptraceEventLocked(linux.PTRACE_EVENT_VFORK_DONE, uint64(child))
   847  	return true
   848  }
   849  
   850  // ptraceExec is called at the end of an execve syscall to check if t should
   851  // enter PTRACE_EVENT_EXEC stop. oldTID is t's thread ID, in its *tracer's* PID
   852  // namespace, prior to the execve. (If t did not have a tracer at the time
   853  // oldTID was read, oldTID may be 0. This is consistent with Linux.)
   854  func (t *Task) ptraceExec(oldTID ThreadID) {
   855  	if !t.hasTracer() {
   856  		return
   857  	}
   858  	t.tg.pidns.owner.mu.RLock()
   859  	defer t.tg.pidns.owner.mu.RUnlock()
   860  	// Recheck with the TaskSet mutex locked. Most ptrace points don't need to
   861  	// do this because detaching resets ptrace options, but PTRACE_EVENT_EXEC
   862  	// is special because both TraceExec and !TraceExec do something if a
   863  	// tracer is attached.
   864  	if !t.hasTracer() {
   865  		return
   866  	}
   867  	if t.ptraceOpts.TraceExec {
   868  		t.Debugf("Entering PTRACE_EVENT_EXEC stop")
   869  		t.ptraceEventLocked(linux.PTRACE_EVENT_EXEC, uint64(oldTID))
   870  		return
   871  	}
   872  	// "If the PTRACE_O_TRACEEXEC option is not in effect for the execing
   873  	// tracee, and if the tracee was PTRACE_ATTACHed rather that [sic]
   874  	// PTRACE_SEIZEd, the kernel delivers an extra SIGTRAP to the tracee after
   875  	// execve(2) returns. This is an ordinary signal (similar to one which can
   876  	// be generated by `kill -TRAP`, not a special kind of ptrace-stop.
   877  	// Employing PTRACE_GETSIGINFO for this signal returns si_code set to 0
   878  	// (SI_USER). This signal may be blocked by signal mask, and thus may be
   879  	// delivered (much) later." - ptrace(2)
   880  	if t.ptraceSeized {
   881  		return
   882  	}
   883  	t.tg.signalHandlers.mu.Lock()
   884  	defer t.tg.signalHandlers.mu.Unlock()
   885  	t.sendSignalLocked(&linux.SignalInfo{
   886  		Signo: int32(linux.SIGTRAP),
   887  		Code:  linux.SI_USER,
   888  	}, false /* group */)
   889  }
   890  
   891  // ptraceExit is called early in the task exit path to check if t should enter
   892  // PTRACE_EVENT_EXIT stop.
   893  func (t *Task) ptraceExit() {
   894  	if !t.hasTracer() {
   895  		return
   896  	}
   897  	t.tg.pidns.owner.mu.RLock()
   898  	defer t.tg.pidns.owner.mu.RUnlock()
   899  	if !t.ptraceOpts.TraceExit {
   900  		return
   901  	}
   902  	t.tg.signalHandlers.mu.Lock()
   903  	status := t.exitStatus
   904  	t.tg.signalHandlers.mu.Unlock()
   905  	t.Debugf("Entering PTRACE_EVENT_EXIT stop")
   906  	t.ptraceEventLocked(linux.PTRACE_EVENT_EXIT, uint64(status))
   907  }
   908  
   909  // Preconditions: The TaskSet mutex must be locked.
   910  func (t *Task) ptraceEventLocked(event int32, msg uint64) {
   911  	t.ptraceEventMsg = msg
   912  	// """
   913  	// PTRACE_EVENT stops are observed by the tracer as waitpid(2) returning
   914  	// with WIFSTOPPED(status), and WSTOPSIG(status) returns SIGTRAP. An
   915  	// additional bit is set in the higher byte of the status word: the value
   916  	// status>>8 will be
   917  	//
   918  	//   (SIGTRAP | PTRACE_EVENT_foo << 8).
   919  	//
   920  	// ...
   921  	//
   922  	// """ - ptrace(2)
   923  	t.ptraceTrapLocked(int32(linux.SIGTRAP) | (event << 8))
   924  }
   925  
   926  // ptraceKill implements ptrace(PTRACE_KILL, target). t is the caller.
   927  func (t *Task) ptraceKill(target *Task) error {
   928  	t.tg.pidns.owner.mu.Lock()
   929  	defer t.tg.pidns.owner.mu.Unlock()
   930  	if target.Tracer() != t {
   931  		return linuxerr.ESRCH
   932  	}
   933  	target.tg.signalHandlers.mu.Lock()
   934  	defer target.tg.signalHandlers.mu.Unlock()
   935  	// "This operation is deprecated; do not use it! Instead, send a SIGKILL
   936  	// directly using kill(2) or tgkill(2). The problem with PTRACE_KILL is
   937  	// that it requires the tracee to be in signal-delivery-stop, otherwise it
   938  	// may not work (i.e., may complete successfully but won't kill the
   939  	// tracee)." - ptrace(2)
   940  	if target.stop == nil {
   941  		return nil
   942  	}
   943  	if _, ok := target.stop.(*ptraceStop); !ok {
   944  		return nil
   945  	}
   946  	target.ptraceCode = int32(linux.SIGKILL)
   947  	target.endInternalStopLocked()
   948  	return nil
   949  }
   950  
   951  func (t *Task) ptraceInterrupt(target *Task) error {
   952  	t.tg.pidns.owner.mu.Lock()
   953  	defer t.tg.pidns.owner.mu.Unlock()
   954  	if target.Tracer() != t {
   955  		return linuxerr.ESRCH
   956  	}
   957  	if !target.ptraceSeized {
   958  		return linuxerr.EIO
   959  	}
   960  	target.tg.signalHandlers.mu.Lock()
   961  	defer target.tg.signalHandlers.mu.Unlock()
   962  	if target.killedLocked() || target.exitState >= TaskExitInitiated {
   963  		return nil
   964  	}
   965  	target.trapStopPending = true
   966  	if s, ok := target.stop.(*ptraceStop); ok && s.listen {
   967  		target.endInternalStopLocked()
   968  	}
   969  	target.interrupt()
   970  	return nil
   971  }
   972  
   973  // Preconditions:
   974  //   - The TaskSet mutex must be locked for writing.
   975  //   - t must have a tracer.
   976  func (t *Task) ptraceSetOptionsLocked(opts uintptr) error {
   977  	const valid = uintptr(linux.PTRACE_O_EXITKILL |
   978  		linux.PTRACE_O_TRACESYSGOOD |
   979  		linux.PTRACE_O_TRACECLONE |
   980  		linux.PTRACE_O_TRACEEXEC |
   981  		linux.PTRACE_O_TRACEEXIT |
   982  		linux.PTRACE_O_TRACEFORK |
   983  		linux.PTRACE_O_TRACESECCOMP |
   984  		linux.PTRACE_O_TRACEVFORK |
   985  		linux.PTRACE_O_TRACEVFORKDONE)
   986  	if opts&^valid != 0 {
   987  		return linuxerr.EINVAL
   988  	}
   989  	t.ptraceOpts = ptraceOptions{
   990  		ExitKill:       opts&linux.PTRACE_O_EXITKILL != 0,
   991  		SysGood:        opts&linux.PTRACE_O_TRACESYSGOOD != 0,
   992  		TraceClone:     opts&linux.PTRACE_O_TRACECLONE != 0,
   993  		TraceExec:      opts&linux.PTRACE_O_TRACEEXEC != 0,
   994  		TraceExit:      opts&linux.PTRACE_O_TRACEEXIT != 0,
   995  		TraceFork:      opts&linux.PTRACE_O_TRACEFORK != 0,
   996  		TraceSeccomp:   opts&linux.PTRACE_O_TRACESECCOMP != 0,
   997  		TraceVfork:     opts&linux.PTRACE_O_TRACEVFORK != 0,
   998  		TraceVforkDone: opts&linux.PTRACE_O_TRACEVFORKDONE != 0,
   999  	}
  1000  	return nil
  1001  }
  1002  
  1003  // Ptrace implements the ptrace system call.
  1004  func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
  1005  	// PTRACE_TRACEME ignores all other arguments.
  1006  	if req == linux.PTRACE_TRACEME {
  1007  		return t.ptraceTraceme()
  1008  	}
  1009  	// All other ptrace requests operate on a current or future tracee
  1010  	// specified by pid.
  1011  	target := t.tg.pidns.TaskWithID(pid)
  1012  	if target == nil {
  1013  		return linuxerr.ESRCH
  1014  	}
  1015  
  1016  	// PTRACE_ATTACH and PTRACE_SEIZE do not require that target is not already
  1017  	// a tracee.
  1018  	if req == linux.PTRACE_ATTACH || req == linux.PTRACE_SEIZE {
  1019  		seize := req == linux.PTRACE_SEIZE
  1020  		if seize && addr != 0 {
  1021  			return linuxerr.EIO
  1022  		}
  1023  		return t.ptraceAttach(target, seize, uintptr(data))
  1024  	}
  1025  	// PTRACE_KILL and PTRACE_INTERRUPT require that the target is a tracee,
  1026  	// but does not require that it is ptrace-stopped.
  1027  	if req == linux.PTRACE_KILL {
  1028  		return t.ptraceKill(target)
  1029  	}
  1030  	if req == linux.PTRACE_INTERRUPT {
  1031  		return t.ptraceInterrupt(target)
  1032  	}
  1033  	// All other ptrace requests require that the target is a ptrace-stopped
  1034  	// tracee, and freeze the ptrace-stop so the tracee can be operated on.
  1035  	t.tg.pidns.owner.mu.RLock()
  1036  	if target.Tracer() != t {
  1037  		t.tg.pidns.owner.mu.RUnlock()
  1038  		return linuxerr.ESRCH
  1039  	}
  1040  	if !target.ptraceFreeze() {
  1041  		t.tg.pidns.owner.mu.RUnlock()
  1042  		// "Most ptrace commands (all except PTRACE_ATTACH, PTRACE_SEIZE,
  1043  		// PTRACE_TRACEME, PTRACE_INTERRUPT, and PTRACE_KILL) require the
  1044  		// tracee to be in a ptrace-stop, otherwise they fail with ESRCH." -
  1045  		// ptrace(2)
  1046  		return linuxerr.ESRCH
  1047  	}
  1048  	t.tg.pidns.owner.mu.RUnlock()
  1049  	// Even if the target has a ptrace-stop active, the tracee's task goroutine
  1050  	// may not yet have reached Task.doStop; wait for it to do so. This is safe
  1051  	// because there's no way for target to initiate a ptrace-stop and then
  1052  	// block (by calling Task.block) before entering it.
  1053  	//
  1054  	// Caveat: If tasks were just restored, the tracee's first call to
  1055  	// Task.Activate (in Task.run) occurs before its first call to Task.doStop,
  1056  	// which may block if the tracer's address space is active.
  1057  	t.UninterruptibleSleepStart(true)
  1058  	target.waitGoroutineStoppedOrExited()
  1059  	t.UninterruptibleSleepFinish(true)
  1060  
  1061  	// Resuming commands end the ptrace stop, but only if successful.
  1062  	// PTRACE_LISTEN ends the ptrace stop if trapNotifyPending is already set on the
  1063  	// target.
  1064  	switch req {
  1065  	case linux.PTRACE_DETACH:
  1066  		if err := t.ptraceDetach(target, linux.Signal(data)); err != nil {
  1067  			target.ptraceUnfreeze()
  1068  			return err
  1069  		}
  1070  		return nil
  1071  
  1072  	case linux.PTRACE_CONT:
  1073  		if err := target.ptraceUnstop(ptraceSyscallNone, false, linux.Signal(data)); err != nil {
  1074  			target.ptraceUnfreeze()
  1075  			return err
  1076  		}
  1077  		return nil
  1078  
  1079  	case linux.PTRACE_SYSCALL:
  1080  		if err := target.ptraceUnstop(ptraceSyscallIntercept, false, linux.Signal(data)); err != nil {
  1081  			target.ptraceUnfreeze()
  1082  			return err
  1083  		}
  1084  		return nil
  1085  
  1086  	case linux.PTRACE_SINGLESTEP:
  1087  		if err := target.ptraceUnstop(ptraceSyscallNone, true, linux.Signal(data)); err != nil {
  1088  			target.ptraceUnfreeze()
  1089  			return err
  1090  		}
  1091  		return nil
  1092  
  1093  	case linux.PTRACE_SYSEMU:
  1094  		if err := target.ptraceUnstop(ptraceSyscallEmu, false, linux.Signal(data)); err != nil {
  1095  			target.ptraceUnfreeze()
  1096  			return err
  1097  		}
  1098  		return nil
  1099  
  1100  	case linux.PTRACE_SYSEMU_SINGLESTEP:
  1101  		if err := target.ptraceUnstop(ptraceSyscallEmu, true, linux.Signal(data)); err != nil {
  1102  			target.ptraceUnfreeze()
  1103  			return err
  1104  		}
  1105  		return nil
  1106  
  1107  	case linux.PTRACE_LISTEN:
  1108  		t.tg.pidns.owner.mu.RLock()
  1109  		defer t.tg.pidns.owner.mu.RUnlock()
  1110  		if !target.ptraceSeized {
  1111  			return linuxerr.EIO
  1112  		}
  1113  		if target.ptraceSiginfo == nil {
  1114  			return linuxerr.EIO
  1115  		}
  1116  		if target.ptraceSiginfo.Code>>8 != linux.PTRACE_EVENT_STOP {
  1117  			return linuxerr.EIO
  1118  		}
  1119  		target.tg.signalHandlers.mu.Lock()
  1120  		defer target.tg.signalHandlers.mu.Unlock()
  1121  		if target.trapNotifyPending {
  1122  			target.endInternalStopLocked()
  1123  		} else {
  1124  			target.stop.(*ptraceStop).listen = true
  1125  			target.ptraceUnfreezeLocked()
  1126  		}
  1127  		return nil
  1128  	}
  1129  
  1130  	// All other ptrace requests expect us to unfreeze the stop.
  1131  	defer target.ptraceUnfreeze()
  1132  
  1133  	switch req {
  1134  	case linux.PTRACE_PEEKTEXT, linux.PTRACE_PEEKDATA:
  1135  		// "At the system call level, the PTRACE_PEEKTEXT, PTRACE_PEEKDATA, and
  1136  		// PTRACE_PEEKUSER requests have a different API: they store the result
  1137  		// at the address specified by the data parameter, and the return value
  1138  		// is the error flag." - ptrace(2)
  1139  		word := t.Arch().Native(0)
  1140  		if _, err := word.CopyIn(target.CopyContext(t, usermem.IOOpts{IgnorePermissions: true}), addr); err != nil {
  1141  			return err
  1142  		}
  1143  		_, err := word.CopyOut(t, data)
  1144  		return err
  1145  
  1146  	case linux.PTRACE_POKETEXT, linux.PTRACE_POKEDATA:
  1147  		word := t.Arch().Native(uintptr(data))
  1148  		_, err := word.CopyOut(target.CopyContext(t, usermem.IOOpts{IgnorePermissions: true}), addr)
  1149  		return err
  1150  
  1151  	case linux.PTRACE_GETREGSET:
  1152  		// "Read the tracee's registers. addr specifies, in an
  1153  		// architecture-dependent way, the type of registers to be read. ...
  1154  		// data points to a struct iovec, which describes the destination
  1155  		// buffer's location and length. On return, the kernel modifies iov.len
  1156  		// to indicate the actual number of bytes returned." - ptrace(2)
  1157  		ars, err := t.CopyInIovecs(data, 1)
  1158  		if err != nil {
  1159  			return err
  1160  		}
  1161  
  1162  		ar := ars.Head()
  1163  		n, err := target.Arch().PtraceGetRegSet(uintptr(addr), &usermem.IOReadWriter{
  1164  			Ctx:  t,
  1165  			IO:   t.MemoryManager(),
  1166  			Addr: ar.Start,
  1167  			Opts: usermem.IOOpts{
  1168  				AddressSpaceActive: true,
  1169  			},
  1170  		}, int(ar.Length()), target.Kernel().FeatureSet())
  1171  		if err != nil {
  1172  			return err
  1173  		}
  1174  
  1175  		// Update iovecs to represent the range of the written register set.
  1176  		end, ok := ar.Start.AddLength(uint64(n))
  1177  		if !ok {
  1178  			panic(fmt.Sprintf("%#x + %#x overflows. Invalid reg size > %#x", ar.Start, n, ar.Length()))
  1179  		}
  1180  		ar.End = end
  1181  		return t.CopyOutIovecs(data, hostarch.AddrRangeSeqOf(ar))
  1182  
  1183  	case linux.PTRACE_SETREGSET:
  1184  		ars, err := t.CopyInIovecs(data, 1)
  1185  		if err != nil {
  1186  			return err
  1187  		}
  1188  
  1189  		ar := ars.Head()
  1190  		n, err := target.Arch().PtraceSetRegSet(uintptr(addr), &usermem.IOReadWriter{
  1191  			Ctx:  t,
  1192  			IO:   t.MemoryManager(),
  1193  			Addr: ar.Start,
  1194  			Opts: usermem.IOOpts{
  1195  				AddressSpaceActive: true,
  1196  			},
  1197  		}, int(ar.Length()), target.Kernel().FeatureSet())
  1198  		if err != nil {
  1199  			return err
  1200  		}
  1201  		target.p.FullStateChanged()
  1202  		ar.End -= hostarch.Addr(n)
  1203  		return t.CopyOutIovecs(data, hostarch.AddrRangeSeqOf(ar))
  1204  
  1205  	case linux.PTRACE_GETSIGINFO:
  1206  		t.tg.pidns.owner.mu.RLock()
  1207  		defer t.tg.pidns.owner.mu.RUnlock()
  1208  		if target.ptraceSiginfo == nil {
  1209  			return linuxerr.EINVAL
  1210  		}
  1211  		_, err := target.ptraceSiginfo.CopyOut(t, data)
  1212  		return err
  1213  
  1214  	case linux.PTRACE_SETSIGINFO:
  1215  		var info linux.SignalInfo
  1216  		if _, err := info.CopyIn(t, data); err != nil {
  1217  			return err
  1218  		}
  1219  		t.tg.pidns.owner.mu.RLock()
  1220  		defer t.tg.pidns.owner.mu.RUnlock()
  1221  		if target.ptraceSiginfo == nil {
  1222  			return linuxerr.EINVAL
  1223  		}
  1224  		target.ptraceSiginfo = &info
  1225  		return nil
  1226  
  1227  	case linux.PTRACE_GETSIGMASK:
  1228  		if addr != linux.SignalSetSize {
  1229  			return linuxerr.EINVAL
  1230  		}
  1231  		mask := target.SignalMask()
  1232  		_, err := mask.CopyOut(t, data)
  1233  		return err
  1234  
  1235  	case linux.PTRACE_SETSIGMASK:
  1236  		if addr != linux.SignalSetSize {
  1237  			return linuxerr.EINVAL
  1238  		}
  1239  		var mask linux.SignalSet
  1240  		if _, err := mask.CopyIn(t, data); err != nil {
  1241  			return err
  1242  		}
  1243  		// The target's task goroutine is stopped, so this is safe:
  1244  		target.SetSignalMask(mask &^ UnblockableSignals)
  1245  		return nil
  1246  
  1247  	case linux.PTRACE_SETOPTIONS:
  1248  		t.tg.pidns.owner.mu.Lock()
  1249  		defer t.tg.pidns.owner.mu.Unlock()
  1250  		return target.ptraceSetOptionsLocked(uintptr(data))
  1251  
  1252  	case linux.PTRACE_GETEVENTMSG:
  1253  		t.tg.pidns.owner.mu.RLock()
  1254  		defer t.tg.pidns.owner.mu.RUnlock()
  1255  		_, err := primitive.CopyUint64Out(t, hostarch.Addr(data), target.ptraceEventMsg)
  1256  		return err
  1257  
  1258  	// PEEKSIGINFO is unimplemented but seems to have no users anywhere.
  1259  
  1260  	default:
  1261  		return t.ptraceArch(target, req, addr, data)
  1262  	}
  1263  }