github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/kernel/ptrace.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    21  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    22  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/marshal/primitive"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/mm"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/usermem"
    26  )
    27  
    28  // ptraceOptions are the subset of options controlling a task's ptrace behavior
    29  // that are set by ptrace(PTRACE_SETOPTIONS).
    30  //
    31  // +stateify savable
    32  type ptraceOptions struct {
    33  	// ExitKill is true if the tracee should be sent SIGKILL when the tracer
    34  	// exits.
    35  	ExitKill bool
    36  
    37  	// If SysGood is true, set bit 7 in the signal number for
    38  	// syscall-entry-stop and syscall-exit-stop traps delivered to this task's
    39  	// tracer.
    40  	SysGood bool
    41  
    42  	// TraceClone is true if the tracer wants to receive PTRACE_EVENT_CLONE
    43  	// events.
    44  	TraceClone bool
    45  
    46  	// TraceExec is true if the tracer wants to receive PTRACE_EVENT_EXEC
    47  	// events.
    48  	TraceExec bool
    49  
    50  	// TraceExit is true if the tracer wants to receive PTRACE_EVENT_EXIT
    51  	// events.
    52  	TraceExit bool
    53  
    54  	// TraceFork is true if the tracer wants to receive PTRACE_EVENT_FORK
    55  	// events.
    56  	TraceFork bool
    57  
    58  	// TraceSeccomp is true if the tracer wants to receive PTRACE_EVENT_SECCOMP
    59  	// events.
    60  	TraceSeccomp bool
    61  
    62  	// TraceVfork is true if the tracer wants to receive PTRACE_EVENT_VFORK
    63  	// events.
    64  	TraceVfork bool
    65  
    66  	// TraceVforkDone is true if the tracer wants to receive
    67  	// PTRACE_EVENT_VFORK_DONE events.
    68  	TraceVforkDone bool
    69  }
    70  
    71  // ptraceSyscallMode controls the behavior of a ptraced task at syscall entry
    72  // and exit.
    73  type ptraceSyscallMode int
    74  
    75  const (
    76  	// ptraceSyscallNone indicates that the task has never ptrace-stopped, or
    77  	// that it was resumed from its last ptrace-stop by PTRACE_CONT or
    78  	// PTRACE_DETACH. The task's syscalls will not be intercepted.
    79  	ptraceSyscallNone ptraceSyscallMode = iota
    80  
    81  	// ptraceSyscallIntercept indicates that the task was resumed from its last
    82  	// ptrace-stop by PTRACE_SYSCALL. The next time the task enters or exits a
    83  	// syscall, a ptrace-stop will occur.
    84  	ptraceSyscallIntercept
    85  
    86  	// ptraceSyscallEmu indicates that the task was resumed from its last
    87  	// ptrace-stop by PTRACE_SYSEMU or PTRACE_SYSEMU_SINGLESTEP. The next time
    88  	// the task enters a syscall, the syscall will be skipped, and a
    89  	// ptrace-stop will occur.
    90  	ptraceSyscallEmu
    91  )
    92  
    93  // CanTrace checks that t is permitted to access target's state, as defined by
    94  // ptrace(2), subsection "Ptrace access mode checking". If attach is true, it
    95  // checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access
    96  // mode PTRACE_MODE_READ.
    97  //
    98  // In Linux, ptrace access restrictions may be configured by LSMs. While we do
    99  // not support LSMs, we do add additional restrictions based on the commoncap
   100  // and YAMA LSMs.
   101  //
   102  // TODO(gvisor.dev/issue/212): The result of CanTrace is immediately stale (e.g., a
   103  // racing setuid(2) may change traceability). This may pose a risk when a task
   104  // changes from traceable to not traceable. This is only problematic across
   105  // execve, where privileges may increase.
   106  //
   107  // We currently do not implement privileged executables (set-user/group-ID bits
   108  // and file capabilities), so that case is not reachable.
   109  func (t *Task) CanTrace(target *Task, attach bool) bool {
   110  	// "If the calling thread and the target thread are in the same thread
   111  	// group, access is always allowed." - ptrace(2)
   112  	//
   113  	// Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access()
   114  	// should not deny sub-threads", first released in Linux 3.12), the rule
   115  	// only applies if t and target are the same task. But, as that commit
   116  	// message puts it, "[any] security check is pointless when the tasks share
   117  	// the same ->mm."
   118  	if t.tg == target.tg {
   119  		return true
   120  	}
   121  
   122  	if !t.canTraceStandard(target, attach) {
   123  		return false
   124  	}
   125  
   126  	if t.k.YAMAPtraceScope.Load() == linux.YAMA_SCOPE_RELATIONAL {
   127  		t.tg.pidns.owner.mu.RLock()
   128  		defer t.tg.pidns.owner.mu.RUnlock()
   129  		if !t.canTraceYAMALocked(target) {
   130  			return false
   131  		}
   132  	}
   133  	return true
   134  }
   135  
   136  // canTraceLocked is the same as CanTrace, except the caller must already hold
   137  // the TaskSet mutex (for reading or writing).
   138  func (t *Task) canTraceLocked(target *Task, attach bool) bool {
   139  	if t.tg == target.tg {
   140  		return true
   141  	}
   142  
   143  	if !t.canTraceStandard(target, attach) {
   144  		return false
   145  	}
   146  
   147  	if t.k.YAMAPtraceScope.Load() == linux.YAMA_SCOPE_RELATIONAL {
   148  		if !t.canTraceYAMALocked(target) {
   149  			return false
   150  		}
   151  	}
   152  	return true
   153  }
   154  
   155  // canTraceStandard performs standard ptrace access checks as defined by
   156  // kernel/ptrace.c:__ptrace_may_access as well as the commoncap LSM
   157  // implementation of the security_ptrace_access_check() interface, which is
   158  // always invoked.
   159  func (t *Task) canTraceStandard(target *Task, attach bool) bool {
   160  	// """
   161  	// TODO(gvisor.dev/issue/260): 1. If the access mode specifies
   162  	// PTRACE_MODE_FSCREDS (ED: snipped, doesn't exist until Linux 4.5).
   163  	//
   164  	// Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the
   165  	// caller's real UID and GID for the checks in the next step. (Most APIs
   166  	// that check the caller's UID and GID use the effective IDs. For
   167  	// historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs
   168  	// instead.)
   169  	//
   170  	// 2. Deny access if neither of the following is true:
   171  	//
   172  	//	- The real, effective, and saved-set user IDs of the target match the
   173  	//		caller's user ID, *and* the real, effective, and saved-set group IDs of
   174  	//		the target match the caller's group ID.
   175  	//
   176  	//	- The caller has the CAP_SYS_PTRACE capability in the user namespace of
   177  	//		the target.
   178  	//
   179  	// 3. Deny access if the target process "dumpable" attribute has a value
   180  	// other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in
   181  	// prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in
   182  	// the user namespace of the target process.
   183  	//
   184  	// 4. The commoncap LSM performs the following steps:
   185  	//
   186  	// a) If the access mode includes PTRACE_MODE_FSCREDS, then use the
   187  	// caller's effective capability set; otherwise (the access mode specifies
   188  	// PTRACE_MODE_REALCREDS, so) use the caller's permitted capability set.
   189  	//
   190  	// b) Deny access if neither of the following is true:
   191  	//
   192  	//	- The caller and the target process are in the same user namespace, and
   193  	//		the caller's capabilities are a proper superset of the target process's
   194  	//		permitted capabilities.
   195  	//
   196  	//	- The caller has the CAP_SYS_PTRACE capability in the target process's
   197  	//		user namespace.
   198  	//
   199  	// Note that the commoncap LSM does not distinguish between
   200  	// PTRACE_MODE_READ and PTRACE_MODE_ATTACH. (ED: From earlier in this
   201  	// section: "the commoncap LSM ... is always invoked".)
   202  	// """
   203  	callerCreds := t.Credentials()
   204  	targetCreds := target.Credentials()
   205  	if callerCreds.HasCapabilityIn(linux.CAP_SYS_PTRACE, targetCreds.UserNamespace) {
   206  		return true
   207  	}
   208  	if cuid := callerCreds.RealKUID; cuid != targetCreds.RealKUID || cuid != targetCreds.EffectiveKUID || cuid != targetCreds.SavedKUID {
   209  		return false
   210  	}
   211  	if cgid := callerCreds.RealKGID; cgid != targetCreds.RealKGID || cgid != targetCreds.EffectiveKGID || cgid != targetCreds.SavedKGID {
   212  		return false
   213  	}
   214  	var targetMM *mm.MemoryManager
   215  	target.WithMuLocked(func(t *Task) {
   216  		targetMM = t.MemoryManager()
   217  	})
   218  	if targetMM != nil && targetMM.Dumpability() != mm.UserDumpable {
   219  		return false
   220  	}
   221  	if callerCreds.UserNamespace != targetCreds.UserNamespace {
   222  		return false
   223  	}
   224  	if targetCreds.PermittedCaps&^callerCreds.PermittedCaps != 0 {
   225  		return false
   226  	}
   227  	return true
   228  }
   229  
   230  // canTraceYAMALocked performs ptrace access checks as defined by the YAMA LSM
   231  // implementation of the security_ptrace_access_check() interface, with YAMA
   232  // configured to mode 1. This is a common default among various Linux
   233  // distributions.
   234  //
   235  // It only permits the tracer to proceed if one of the following conditions is
   236  // met:
   237  //
   238  // a) The tracer is already attached to the tracee.
   239  //
   240  // b) The target is a descendant of the tracer.
   241  //
   242  // c) The target has explicitly given permission to the tracer through the
   243  // PR_SET_PTRACER prctl.
   244  //
   245  // d) The tracer has CAP_SYS_PTRACE.
   246  //
   247  // See security/yama/yama_lsm.c:yama_ptrace_access_check.
   248  //
   249  // Precondition: the TaskSet mutex must be locked (for reading or writing).
   250  func (t *Task) canTraceYAMALocked(target *Task) bool {
   251  	if tracer := target.Tracer(); tracer != nil {
   252  		if tracer.tg == t.tg {
   253  			return true
   254  		}
   255  	}
   256  	if target.isYAMADescendantOfLocked(t) {
   257  		return true
   258  	}
   259  	if target.hasYAMAExceptionForLocked(t) {
   260  		return true
   261  	}
   262  	if t.HasCapabilityIn(linux.CAP_SYS_PTRACE, target.UserNamespace()) {
   263  		return true
   264  	}
   265  	return false
   266  }
   267  
   268  // Determines whether t is considered a descendant of ancestor for the purposes
   269  // of YAMA permissions (specifically, whether t's thread group is descended from
   270  // ancestor's).
   271  //
   272  // Precondition: the TaskSet mutex must be locked (for reading or writing).
   273  func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool {
   274  	walker := t
   275  	for walker != nil {
   276  		if walker.tg.leader == ancestor.tg.leader {
   277  			return true
   278  		}
   279  		walker = walker.parent
   280  	}
   281  	return false
   282  }
   283  
   284  // Precondition: the TaskSet mutex must be locked (for reading or writing).
   285  func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool {
   286  	allowed, ok := t.k.ptraceExceptions[t.tg.leader]
   287  	if !ok {
   288  		return false
   289  	}
   290  	return allowed == nil || tracer.isYAMADescendantOfLocked(allowed)
   291  }
   292  
   293  // ClearYAMAException removes any YAMA exception with t as the tracee.
   294  func (t *Task) ClearYAMAException() {
   295  	t.tg.pidns.owner.mu.Lock()
   296  	defer t.tg.pidns.owner.mu.Unlock()
   297  	tracee := t.tg.leader
   298  	delete(t.k.ptraceExceptions, tracee)
   299  }
   300  
   301  // SetYAMAException creates a YAMA exception allowing all descendants of tracer
   302  // to trace t. If tracer is nil, then any task is allowed to trace t.
   303  //
   304  // If there was an existing exception, it is overwritten with the new one.
   305  func (t *Task) SetYAMAException(tracer *Task) {
   306  	t.tg.pidns.owner.mu.Lock()
   307  	defer t.tg.pidns.owner.mu.Unlock()
   308  
   309  	tracee := t.tg.leader
   310  	tracee.ptraceYAMAExceptionAdded = true
   311  	if tracer != nil {
   312  		tracer.ptraceYAMAExceptionAdded = true
   313  	}
   314  
   315  	t.k.ptraceExceptions[tracee] = tracer
   316  }
   317  
   318  // Tracer returns t's ptrace Tracer.
   319  func (t *Task) Tracer() *Task {
   320  	return t.ptraceTracer.Load().(*Task)
   321  }
   322  
   323  // hasTracer returns true if t has a ptrace tracer attached.
   324  func (t *Task) hasTracer() bool {
   325  	// This isn't just inlined into callers so that if Task.Tracer() turns out
   326  	// to be too expensive because of e.g. interface conversion, we can switch
   327  	// to having a separate atomic flag more easily.
   328  	return t.Tracer() != nil
   329  }
   330  
   331  // ptraceStop is a TaskStop placed on tasks in a ptrace-stop.
   332  //
   333  // +stateify savable
   334  type ptraceStop struct {
   335  	// If frozen is true, the stopped task's tracer is currently operating on
   336  	// it, so Task.Kill should not remove the stop.
   337  	frozen bool
   338  
   339  	// If listen is true, the stopped task's tracer invoked PTRACE_LISTEN, so
   340  	// ptraceFreeze should fail.
   341  	listen bool
   342  }
   343  
   344  // Killable implements TaskStop.Killable.
   345  func (s *ptraceStop) Killable() bool {
   346  	return !s.frozen
   347  }
   348  
   349  // beginPtraceStopLocked initiates an unfrozen ptrace-stop on t. If t has been
   350  // killed, the stop is skipped, and beginPtraceStopLocked returns false.
   351  //
   352  // beginPtraceStopLocked does not signal t's tracer or wake it if it is
   353  // waiting.
   354  //
   355  // Preconditions:
   356  //   - The TaskSet mutex must be locked.
   357  //   - The caller must be running on the task goroutine.
   358  func (t *Task) beginPtraceStopLocked() bool {
   359  	t.tg.signalHandlers.mu.Lock()
   360  	defer t.tg.signalHandlers.mu.Unlock()
   361  	// This is analogous to Linux's kernel/signal.c:ptrace_stop() => ... =>
   362  	// kernel/sched/core.c:__schedule() => signal_pending_state() check, which
   363  	// is what prevents tasks from entering ptrace-stops after being killed.
   364  	// Note that if t was SIGKILLed and beingPtraceStopLocked is being called
   365  	// for PTRACE_EVENT_EXIT, the task will have dequeued the signal before
   366  	// entering the exit path, so t.killedLocked() will no longer return true.
   367  	// This is consistent with Linux: "Bugs: ... A SIGKILL signal may still
   368  	// cause a PTRACE_EVENT_EXIT stop before actual signal death. This may be
   369  	// changed in the future; SIGKILL is meant to always immediately kill tasks
   370  	// even under ptrace. Last confirmed on Linux 3.13." - ptrace(2)
   371  	if t.killedLocked() {
   372  		return false
   373  	}
   374  	t.beginInternalStopLocked(&ptraceStop{})
   375  	return true
   376  }
   377  
   378  // Preconditions: The TaskSet mutex must be locked.
   379  func (t *Task) ptraceTrapLocked(code int32) {
   380  	// This is unconditional in ptrace_stop().
   381  	t.tg.signalHandlers.mu.Lock()
   382  	t.trapStopPending = false
   383  	t.tg.signalHandlers.mu.Unlock()
   384  	t.ptraceCode = code
   385  	t.ptraceSiginfo = &linux.SignalInfo{
   386  		Signo: int32(linux.SIGTRAP),
   387  		Code:  code,
   388  	}
   389  	t.ptraceSiginfo.SetPID(int32(t.tg.pidns.tids[t]))
   390  	t.ptraceSiginfo.SetUID(int32(t.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()))
   391  	if t.beginPtraceStopLocked() {
   392  		tracer := t.Tracer()
   393  		tracer.signalStop(t, linux.CLD_TRAPPED, int32(linux.SIGTRAP))
   394  		tracer.tg.eventQueue.Notify(EventTraceeStop)
   395  	}
   396  }
   397  
   398  // ptraceFreeze checks if t is in a ptraceStop. If so, it freezes the
   399  // ptraceStop, temporarily preventing it from being removed by a concurrent
   400  // Task.Kill, and returns true. Otherwise it returns false.
   401  //
   402  // Preconditions:
   403  //   - The TaskSet mutex must be locked.
   404  //   - The caller must be running on the task goroutine of t's tracer.
   405  func (t *Task) ptraceFreeze() bool {
   406  	t.tg.signalHandlers.mu.Lock()
   407  	defer t.tg.signalHandlers.mu.Unlock()
   408  	if t.stop == nil {
   409  		return false
   410  	}
   411  	s, ok := t.stop.(*ptraceStop)
   412  	if !ok {
   413  		return false
   414  	}
   415  	if s.listen {
   416  		return false
   417  	}
   418  	s.frozen = true
   419  	return true
   420  }
   421  
   422  // ptraceUnfreeze ends the effect of a previous successful call to
   423  // ptraceFreeze.
   424  //
   425  // Preconditions: t must be in a frozen ptraceStop.
   426  func (t *Task) ptraceUnfreeze() {
   427  	// t.tg.signalHandlers is stable because t is in a frozen ptrace-stop,
   428  	// preventing its thread group from completing execve.
   429  	t.tg.signalHandlers.mu.Lock()
   430  	defer t.tg.signalHandlers.mu.Unlock()
   431  	t.ptraceUnfreezeLocked()
   432  }
   433  
   434  // Preconditions:
   435  //   - t must be in a frozen ptraceStop.
   436  //   - t's signal mutex must be locked.
   437  func (t *Task) ptraceUnfreezeLocked() {
   438  	// Do this even if the task has been killed to ensure a panic if t.stop is
   439  	// nil or not a ptraceStop.
   440  	t.stop.(*ptraceStop).frozen = false
   441  	if t.killedLocked() {
   442  		t.endInternalStopLocked()
   443  	}
   444  }
   445  
   446  // ptraceUnstop implements ptrace request PTRACE_CONT, PTRACE_SYSCALL,
   447  // PTRACE_SINGLESTEP, PTRACE_SYSEMU, or PTRACE_SYSEMU_SINGLESTEP depending on
   448  // mode and singlestep.
   449  //
   450  // Preconditions: t must be in a frozen ptrace stop.
   451  //
   452  // Postconditions: If ptraceUnstop returns nil, t will no longer be in a ptrace
   453  // stop.
   454  func (t *Task) ptraceUnstop(mode ptraceSyscallMode, singlestep bool, sig linux.Signal) error {
   455  	if sig != 0 && !sig.IsValid() {
   456  		return linuxerr.EIO
   457  	}
   458  	t.tg.pidns.owner.mu.Lock()
   459  	defer t.tg.pidns.owner.mu.Unlock()
   460  	t.ptraceCode = int32(sig)
   461  	t.ptraceSyscallMode = mode
   462  	t.ptraceSinglestep = singlestep
   463  	t.tg.signalHandlers.mu.Lock()
   464  	defer t.tg.signalHandlers.mu.Unlock()
   465  	t.endInternalStopLocked()
   466  	return nil
   467  }
   468  
   469  func (t *Task) ptraceTraceme() error {
   470  	t.tg.pidns.owner.mu.Lock()
   471  	defer t.tg.pidns.owner.mu.Unlock()
   472  	if t.hasTracer() {
   473  		return linuxerr.EPERM
   474  	}
   475  	if t.parent == nil {
   476  		// In Linux, only init can not have a parent, and init is assumed never
   477  		// to invoke PTRACE_TRACEME. In the sentry, TGID 1 is an arbitrary user
   478  		// application that may invoke PTRACE_TRACEME; having no parent can
   479  		// also occur if all tasks in the parent thread group have exited, and
   480  		// failed to find a living thread group to reparent to. The former case
   481  		// is treated as if TGID 1 has an exited parent in an invisible
   482  		// ancestor PID namespace that is an owner of the root user namespace
   483  		// (and consequently has CAP_SYS_PTRACE), and the latter case is a
   484  		// special form of the exited parent case below. In either case,
   485  		// returning nil here is correct.
   486  		return nil
   487  	}
   488  	if !t.parent.canTraceLocked(t, true) {
   489  		return linuxerr.EPERM
   490  	}
   491  	if t.parent.exitState != TaskExitNone {
   492  		// Fail silently, as if we were successfully attached but then
   493  		// immediately detached. This is consistent with Linux.
   494  		return nil
   495  	}
   496  	t.ptraceTracer.Store(t.parent)
   497  	t.parent.ptraceTracees[t] = struct{}{}
   498  	return nil
   499  }
   500  
   501  // ptraceAttach implements ptrace(PTRACE_ATTACH, target) if seize is false, and
   502  // ptrace(PTRACE_SEIZE, target, 0, opts) if seize is true. t is the caller.
   503  func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
   504  	if t.tg == target.tg {
   505  		return linuxerr.EPERM
   506  	}
   507  	t.tg.pidns.owner.mu.Lock()
   508  	defer t.tg.pidns.owner.mu.Unlock()
   509  	if !t.canTraceLocked(target, true) {
   510  		return linuxerr.EPERM
   511  	}
   512  	if target.hasTracer() {
   513  		return linuxerr.EPERM
   514  	}
   515  	// Attaching to zombies and dead tasks is not permitted; the exit
   516  	// notification logic relies on this. Linux allows attaching to PF_EXITING
   517  	// tasks, though.
   518  	if target.exitState >= TaskExitZombie {
   519  		return linuxerr.EPERM
   520  	}
   521  	if seize {
   522  		if err := target.ptraceSetOptionsLocked(opts); err != nil {
   523  			return linuxerr.EIO
   524  		}
   525  	}
   526  	target.ptraceTracer.Store(t)
   527  	t.ptraceTracees[target] = struct{}{}
   528  	target.ptraceSeized = seize
   529  	target.tg.signalHandlers.mu.Lock()
   530  	// "Unlike PTRACE_ATTACH, PTRACE_SEIZE does not stop the process." -
   531  	// ptrace(2)
   532  	if !seize {
   533  		target.sendSignalLocked(&linux.SignalInfo{
   534  			Signo: int32(linux.SIGSTOP),
   535  			Code:  linux.SI_USER,
   536  		}, false /* group */)
   537  	}
   538  	// Undocumented Linux feature: If the tracee is already group-stopped (and
   539  	// consequently will not report the SIGSTOP just sent), force it to leave
   540  	// and re-enter the stop so that it will switch to a ptrace-stop.
   541  	if target.stop == (*groupStop)(nil) {
   542  		target.trapStopPending = true
   543  		target.endInternalStopLocked()
   544  		// TODO(jamieliu): Linux blocks ptrace_attach() until the task has
   545  		// entered the ptrace-stop (or exited) via JOBCTL_TRAPPING.
   546  	}
   547  	target.tg.signalHandlers.mu.Unlock()
   548  	return nil
   549  }
   550  
   551  // ptraceDetach implements ptrace(PTRACE_DETACH, target, 0, sig). t is the
   552  // caller.
   553  //
   554  // Preconditions: target must be a tracee of t in a frozen ptrace stop.
   555  //
   556  // Postconditions: If ptraceDetach returns nil, target will no longer be in a
   557  // ptrace stop.
   558  func (t *Task) ptraceDetach(target *Task, sig linux.Signal) error {
   559  	if sig != 0 && !sig.IsValid() {
   560  		return linuxerr.EIO
   561  	}
   562  	t.tg.pidns.owner.mu.Lock()
   563  	defer t.tg.pidns.owner.mu.Unlock()
   564  	target.ptraceCode = int32(sig)
   565  	target.forgetTracerLocked()
   566  	delete(t.ptraceTracees, target)
   567  	return nil
   568  }
   569  
   570  // exitPtrace is called in the exit path to detach all of t's tracees.
   571  func (t *Task) exitPtrace() {
   572  	t.tg.pidns.owner.mu.Lock()
   573  	defer t.tg.pidns.owner.mu.Unlock()
   574  	for target := range t.ptraceTracees {
   575  		if target.ptraceOpts.ExitKill {
   576  			target.tg.signalHandlers.mu.Lock()
   577  			target.sendSignalLocked(&linux.SignalInfo{
   578  				Signo: int32(linux.SIGKILL),
   579  			}, false /* group */)
   580  			target.tg.signalHandlers.mu.Unlock()
   581  		}
   582  		// Leave ptraceCode unchanged so that if the task is ptrace-stopped, it
   583  		// observes the ptraceCode it set before it entered the stop. I believe
   584  		// this is consistent with Linux.
   585  		target.forgetTracerLocked()
   586  	}
   587  	// "nil maps cannot be saved"
   588  	t.ptraceTracees = make(map[*Task]struct{})
   589  
   590  	if t.ptraceYAMAExceptionAdded {
   591  		delete(t.k.ptraceExceptions, t)
   592  		for tracee, tracer := range t.k.ptraceExceptions {
   593  			if tracer == t {
   594  				delete(t.k.ptraceExceptions, tracee)
   595  			}
   596  		}
   597  	}
   598  }
   599  
   600  // forgetTracerLocked detaches t's tracer and ensures that t is no longer
   601  // ptrace-stopped.
   602  //
   603  // Preconditions: The TaskSet mutex must be locked for writing.
   604  func (t *Task) forgetTracerLocked() {
   605  	t.ptraceSeized = false
   606  	t.ptraceOpts = ptraceOptions{}
   607  	t.ptraceSyscallMode = ptraceSyscallNone
   608  	t.ptraceSinglestep = false
   609  	t.ptraceTracer.Store((*Task)(nil))
   610  	if t.exitTracerNotified && !t.exitTracerAcked {
   611  		t.exitTracerAcked = true
   612  		t.exitNotifyLocked(true)
   613  	}
   614  	t.tg.signalHandlers.mu.Lock()
   615  	defer t.tg.signalHandlers.mu.Unlock()
   616  	// Unset t.trapStopPending, which might have been set by PTRACE_INTERRUPT. If
   617  	// it wasn't, it will be reset via t.groupStopPending after the following.
   618  	t.trapStopPending = false
   619  	// If t's thread group is in a group stop and t is eligible to participate,
   620  	// make it do so. This is essentially the reverse of the special case in
   621  	// ptraceAttach, which converts a group stop to a ptrace stop. ("Handling
   622  	// of restart from group-stop is currently buggy, but the "as planned"
   623  	// behavior is to leave tracee stopped and waiting for SIGCONT." -
   624  	// ptrace(2))
   625  	if (t.tg.groupStopComplete || t.tg.groupStopPendingCount != 0) && !t.groupStopPending && t.exitState < TaskExitInitiated {
   626  		t.groupStopPending = true
   627  		// t already participated in the group stop when it unset
   628  		// groupStopPending.
   629  		t.groupStopAcknowledged = true
   630  		t.interrupt()
   631  	}
   632  	if _, ok := t.stop.(*ptraceStop); ok {
   633  		t.endInternalStopLocked()
   634  	}
   635  }
   636  
   637  // ptraceSignalLocked is called after signal dequeueing to check if t should
   638  // enter ptrace signal-delivery-stop.
   639  //
   640  // Preconditions:
   641  //   - The signal mutex must be locked.
   642  //   - The caller must be running on the task goroutine.
   643  //
   644  // +checklocks:t.tg.signalHandlers.mu
   645  func (t *Task) ptraceSignalLocked(info *linux.SignalInfo) bool {
   646  	if linux.Signal(info.Signo) == linux.SIGKILL {
   647  		return false
   648  	}
   649  	if !t.hasTracer() {
   650  		return false
   651  	}
   652  	// The tracer might change this signal into a stop signal, in which case
   653  	// any SIGCONT received after the signal was originally dequeued should
   654  	// cancel it. This is consistent with Linux.
   655  	t.tg.groupStopDequeued = true
   656  	// This is unconditional in ptrace_stop().
   657  	t.trapStopPending = false
   658  	// Can't lock the TaskSet mutex while holding a signal mutex.
   659  	t.tg.signalHandlers.mu.Unlock()
   660  	defer t.tg.signalHandlers.mu.Lock()
   661  	t.tg.pidns.owner.mu.RLock()
   662  	defer t.tg.pidns.owner.mu.RUnlock()
   663  	tracer := t.Tracer()
   664  	if tracer == nil {
   665  		return false
   666  	}
   667  	t.ptraceCode = info.Signo
   668  	t.ptraceSiginfo = info
   669  	t.Debugf("Entering signal-delivery-stop for signal %d", info.Signo)
   670  	if t.beginPtraceStopLocked() {
   671  		tracer.signalStop(t, linux.CLD_TRAPPED, info.Signo)
   672  		tracer.tg.eventQueue.Notify(EventTraceeStop)
   673  	}
   674  	return true
   675  }
   676  
   677  // ptraceSeccomp is called when a seccomp-bpf filter returns action
   678  // SECCOMP_RET_TRACE to check if t should enter PTRACE_EVENT_SECCOMP stop. data
   679  // is the lower 16 bits of the filter's return value.
   680  func (t *Task) ptraceSeccomp(data uint16) bool {
   681  	if !t.hasTracer() {
   682  		return false
   683  	}
   684  	t.tg.pidns.owner.mu.RLock()
   685  	defer t.tg.pidns.owner.mu.RUnlock()
   686  	if !t.ptraceOpts.TraceSeccomp {
   687  		return false
   688  	}
   689  	t.Debugf("Entering PTRACE_EVENT_SECCOMP stop")
   690  	t.ptraceEventLocked(linux.PTRACE_EVENT_SECCOMP, uint64(data))
   691  	return true
   692  }
   693  
   694  // ptraceSyscallEnter is called immediately before entering a syscall to check
   695  // if t should enter ptrace syscall-enter-stop.
   696  func (t *Task) ptraceSyscallEnter() (taskRunState, bool) {
   697  	if !t.hasTracer() {
   698  		return nil, false
   699  	}
   700  	t.tg.pidns.owner.mu.RLock()
   701  	defer t.tg.pidns.owner.mu.RUnlock()
   702  	switch t.ptraceSyscallMode {
   703  	case ptraceSyscallNone:
   704  		return nil, false
   705  	case ptraceSyscallIntercept:
   706  		t.Debugf("Entering syscall-enter-stop from PTRACE_SYSCALL")
   707  		t.ptraceSyscallStopLocked()
   708  		return (*runSyscallAfterSyscallEnterStop)(nil), true
   709  	case ptraceSyscallEmu:
   710  		t.Debugf("Entering syscall-enter-stop from PTRACE_SYSEMU")
   711  		t.ptraceSyscallStopLocked()
   712  		return (*runSyscallAfterSysemuStop)(nil), true
   713  	}
   714  	panic(fmt.Sprintf("Unknown ptraceSyscallMode: %v", t.ptraceSyscallMode))
   715  }
   716  
   717  // ptraceSyscallExit is called immediately after leaving a syscall to check if
   718  // t should enter ptrace syscall-exit-stop.
   719  func (t *Task) ptraceSyscallExit() {
   720  	if !t.hasTracer() {
   721  		return
   722  	}
   723  	t.tg.pidns.owner.mu.RLock()
   724  	defer t.tg.pidns.owner.mu.RUnlock()
   725  	if t.ptraceSyscallMode != ptraceSyscallIntercept {
   726  		return
   727  	}
   728  	t.Debugf("Entering syscall-exit-stop")
   729  	t.ptraceSyscallStopLocked()
   730  }
   731  
   732  // Preconditions: The TaskSet mutex must be locked.
   733  func (t *Task) ptraceSyscallStopLocked() {
   734  	code := int32(linux.SIGTRAP)
   735  	if t.ptraceOpts.SysGood {
   736  		code |= 0x80
   737  	}
   738  	t.ptraceTrapLocked(code)
   739  }
   740  
   741  type ptraceCloneKind int32
   742  
   743  const (
   744  	// ptraceCloneKindClone represents a call to Task.Clone where
   745  	// TerminationSignal is not SIGCHLD and Vfork is false.
   746  	ptraceCloneKindClone ptraceCloneKind = iota
   747  
   748  	// ptraceCloneKindFork represents a call to Task.Clone where
   749  	// TerminationSignal is SIGCHLD and Vfork is false.
   750  	ptraceCloneKindFork
   751  
   752  	// ptraceCloneKindVfork represents a call to Task.Clone where Vfork is
   753  	// true.
   754  	ptraceCloneKindVfork
   755  )
   756  
   757  // ptraceClone is called at the end of a clone or fork syscall to check if t
   758  // should enter PTRACE_EVENT_CLONE, PTRACE_EVENT_FORK, or PTRACE_EVENT_VFORK
   759  // stop. child is the new task.
   760  func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, args *linux.CloneArgs) bool {
   761  	if !t.hasTracer() {
   762  		return false
   763  	}
   764  	t.tg.pidns.owner.mu.Lock()
   765  	defer t.tg.pidns.owner.mu.Unlock()
   766  	event := false
   767  	if args.Flags&linux.CLONE_UNTRACED == 0 {
   768  		switch kind {
   769  		case ptraceCloneKindClone:
   770  			if t.ptraceOpts.TraceClone {
   771  				t.Debugf("Entering PTRACE_EVENT_CLONE stop")
   772  				t.ptraceEventLocked(linux.PTRACE_EVENT_CLONE, uint64(t.tg.pidns.tids[child]))
   773  				event = true
   774  			}
   775  		case ptraceCloneKindFork:
   776  			if t.ptraceOpts.TraceFork {
   777  				t.Debugf("Entering PTRACE_EVENT_FORK stop")
   778  				t.ptraceEventLocked(linux.PTRACE_EVENT_FORK, uint64(t.tg.pidns.tids[child]))
   779  				event = true
   780  			}
   781  		case ptraceCloneKindVfork:
   782  			if t.ptraceOpts.TraceVfork {
   783  				t.Debugf("Entering PTRACE_EVENT_VFORK stop")
   784  				t.ptraceEventLocked(linux.PTRACE_EVENT_VFORK, uint64(t.tg.pidns.tids[child]))
   785  				event = true
   786  			}
   787  		default:
   788  			panic(fmt.Sprintf("Unknown ptraceCloneKind: %v", kind))
   789  		}
   790  	}
   791  	// "If the PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, or PTRACE_O_TRACECLONE
   792  	// options are in effect, then children created by, respectively, vfork(2)
   793  	// or clone(2) with the CLONE_VFORK flag, fork(2) or clone(2) with the exit
   794  	// signal set to SIGCHLD, and other kinds of clone(2), are automatically
   795  	// attached to the same tracer which traced their parent. SIGSTOP is
   796  	// delivered to the children, causing them to enter signal-delivery-stop
   797  	// after they exit the system call which created them." - ptrace(2)
   798  	//
   799  	// clone(2)'s documentation of CLONE_UNTRACED and CLONE_PTRACE is
   800  	// confusingly wrong; see kernel/fork.c:_do_fork() => copy_process() =>
   801  	// include/linux/ptrace.h:ptrace_init_task().
   802  	if event || args.Flags&linux.CLONE_PTRACE != 0 {
   803  		tracer := t.Tracer()
   804  		if tracer != nil {
   805  			child.ptraceTracer.Store(tracer)
   806  			tracer.ptraceTracees[child] = struct{}{}
   807  			// "The "seized" behavior ... is inherited by children that are
   808  			// automatically attached using PTRACE_O_TRACEFORK,
   809  			// PTRACE_O_TRACEVFORK, and PTRACE_O_TRACECLONE." - ptrace(2)
   810  			child.ptraceSeized = t.ptraceSeized
   811  			// "Flags are inherited by new tracees created and "auto-attached"
   812  			// via active PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, or
   813  			// PTRACE_O_TRACECLONE options." - ptrace(2)
   814  			child.ptraceOpts = t.ptraceOpts
   815  			child.tg.signalHandlers.mu.Lock()
   816  			// "PTRACE_SEIZE: ... Automatically attached children stop with
   817  			// PTRACE_EVENT_STOP and WSTOPSIG(status) returns SIGTRAP instead
   818  			// of having SIGSTOP signal delivered to them." - ptrace(2)
   819  			if child.ptraceSeized {
   820  				child.trapStopPending = true
   821  			} else {
   822  				child.pendingSignals.enqueue(&linux.SignalInfo{
   823  					Signo: int32(linux.SIGSTOP),
   824  				}, nil)
   825  			}
   826  			// The child will self-interrupt() when its task goroutine starts
   827  			// running, so we don't have to.
   828  			child.tg.signalHandlers.mu.Unlock()
   829  		}
   830  	}
   831  	return event
   832  }
   833  
   834  // ptraceVforkDone is called after the end of a vfork stop to check if t should
   835  // enter PTRACE_EVENT_VFORK_DONE stop. child is the new task's thread ID in t's
   836  // PID namespace.
   837  func (t *Task) ptraceVforkDone(child ThreadID) bool {
   838  	if !t.hasTracer() {
   839  		return false
   840  	}
   841  	t.tg.pidns.owner.mu.RLock()
   842  	defer t.tg.pidns.owner.mu.RUnlock()
   843  	if !t.ptraceOpts.TraceVforkDone {
   844  		return false
   845  	}
   846  	t.Debugf("Entering PTRACE_EVENT_VFORK_DONE stop")
   847  	t.ptraceEventLocked(linux.PTRACE_EVENT_VFORK_DONE, uint64(child))
   848  	return true
   849  }
   850  
   851  // ptraceExec is called at the end of an execve syscall to check if t should
   852  // enter PTRACE_EVENT_EXEC stop. oldTID is t's thread ID, in its *tracer's* PID
   853  // namespace, prior to the execve. (If t did not have a tracer at the time
   854  // oldTID was read, oldTID may be 0. This is consistent with Linux.)
   855  func (t *Task) ptraceExec(oldTID ThreadID) {
   856  	if !t.hasTracer() {
   857  		return
   858  	}
   859  	t.tg.pidns.owner.mu.RLock()
   860  	defer t.tg.pidns.owner.mu.RUnlock()
   861  	// Recheck with the TaskSet mutex locked. Most ptrace points don't need to
   862  	// do this because detaching resets ptrace options, but PTRACE_EVENT_EXEC
   863  	// is special because both TraceExec and !TraceExec do something if a
   864  	// tracer is attached.
   865  	if !t.hasTracer() {
   866  		return
   867  	}
   868  	if t.ptraceOpts.TraceExec {
   869  		t.Debugf("Entering PTRACE_EVENT_EXEC stop")
   870  		t.ptraceEventLocked(linux.PTRACE_EVENT_EXEC, uint64(oldTID))
   871  		return
   872  	}
   873  	// "If the PTRACE_O_TRACEEXEC option is not in effect for the execing
   874  	// tracee, and if the tracee was PTRACE_ATTACHed rather that [sic]
   875  	// PTRACE_SEIZEd, the kernel delivers an extra SIGTRAP to the tracee after
   876  	// execve(2) returns. This is an ordinary signal (similar to one which can
   877  	// be generated by `kill -TRAP`, not a special kind of ptrace-stop.
   878  	// Employing PTRACE_GETSIGINFO for this signal returns si_code set to 0
   879  	// (SI_USER). This signal may be blocked by signal mask, and thus may be
   880  	// delivered (much) later." - ptrace(2)
   881  	if t.ptraceSeized {
   882  		return
   883  	}
   884  	t.tg.signalHandlers.mu.Lock()
   885  	defer t.tg.signalHandlers.mu.Unlock()
   886  	t.sendSignalLocked(&linux.SignalInfo{
   887  		Signo: int32(linux.SIGTRAP),
   888  		Code:  linux.SI_USER,
   889  	}, false /* group */)
   890  }
   891  
   892  // ptraceExit is called early in the task exit path to check if t should enter
   893  // PTRACE_EVENT_EXIT stop.
   894  func (t *Task) ptraceExit() {
   895  	if !t.hasTracer() {
   896  		return
   897  	}
   898  	t.tg.pidns.owner.mu.RLock()
   899  	defer t.tg.pidns.owner.mu.RUnlock()
   900  	if !t.ptraceOpts.TraceExit {
   901  		return
   902  	}
   903  	t.tg.signalHandlers.mu.Lock()
   904  	status := t.exitStatus
   905  	t.tg.signalHandlers.mu.Unlock()
   906  	t.Debugf("Entering PTRACE_EVENT_EXIT stop")
   907  	t.ptraceEventLocked(linux.PTRACE_EVENT_EXIT, uint64(status))
   908  }
   909  
   910  // Preconditions: The TaskSet mutex must be locked.
   911  func (t *Task) ptraceEventLocked(event int32, msg uint64) {
   912  	t.ptraceEventMsg = msg
   913  	// """
   914  	// PTRACE_EVENT stops are observed by the tracer as waitpid(2) returning
   915  	// with WIFSTOPPED(status), and WSTOPSIG(status) returns SIGTRAP. An
   916  	// additional bit is set in the higher byte of the status word: the value
   917  	// status>>8 will be
   918  	//
   919  	//   (SIGTRAP | PTRACE_EVENT_foo << 8).
   920  	//
   921  	// ...
   922  	//
   923  	// """ - ptrace(2)
   924  	t.ptraceTrapLocked(int32(linux.SIGTRAP) | (event << 8))
   925  }
   926  
   927  // ptraceKill implements ptrace(PTRACE_KILL, target). t is the caller.
   928  func (t *Task) ptraceKill(target *Task) error {
   929  	t.tg.pidns.owner.mu.Lock()
   930  	defer t.tg.pidns.owner.mu.Unlock()
   931  	if target.Tracer() != t {
   932  		return linuxerr.ESRCH
   933  	}
   934  	target.tg.signalHandlers.mu.Lock()
   935  	defer target.tg.signalHandlers.mu.Unlock()
   936  	// "This operation is deprecated; do not use it! Instead, send a SIGKILL
   937  	// directly using kill(2) or tgkill(2). The problem with PTRACE_KILL is
   938  	// that it requires the tracee to be in signal-delivery-stop, otherwise it
   939  	// may not work (i.e., may complete successfully but won't kill the
   940  	// tracee)." - ptrace(2)
   941  	if target.stop == nil {
   942  		return nil
   943  	}
   944  	if _, ok := target.stop.(*ptraceStop); !ok {
   945  		return nil
   946  	}
   947  	target.ptraceCode = int32(linux.SIGKILL)
   948  	target.endInternalStopLocked()
   949  	return nil
   950  }
   951  
   952  func (t *Task) ptraceInterrupt(target *Task) error {
   953  	t.tg.pidns.owner.mu.Lock()
   954  	defer t.tg.pidns.owner.mu.Unlock()
   955  	if target.Tracer() != t {
   956  		return linuxerr.ESRCH
   957  	}
   958  	if !target.ptraceSeized {
   959  		return linuxerr.EIO
   960  	}
   961  	target.tg.signalHandlers.mu.Lock()
   962  	defer target.tg.signalHandlers.mu.Unlock()
   963  	if target.killedLocked() || target.exitState >= TaskExitInitiated {
   964  		return nil
   965  	}
   966  	target.trapStopPending = true
   967  	if s, ok := target.stop.(*ptraceStop); ok && s.listen {
   968  		target.endInternalStopLocked()
   969  	}
   970  	target.interrupt()
   971  	return nil
   972  }
   973  
   974  // Preconditions:
   975  //   - The TaskSet mutex must be locked for writing.
   976  //   - t must have a tracer.
   977  func (t *Task) ptraceSetOptionsLocked(opts uintptr) error {
   978  	const valid = uintptr(linux.PTRACE_O_EXITKILL |
   979  		linux.PTRACE_O_TRACESYSGOOD |
   980  		linux.PTRACE_O_TRACECLONE |
   981  		linux.PTRACE_O_TRACEEXEC |
   982  		linux.PTRACE_O_TRACEEXIT |
   983  		linux.PTRACE_O_TRACEFORK |
   984  		linux.PTRACE_O_TRACESECCOMP |
   985  		linux.PTRACE_O_TRACEVFORK |
   986  		linux.PTRACE_O_TRACEVFORKDONE)
   987  	if opts&^valid != 0 {
   988  		return linuxerr.EINVAL
   989  	}
   990  	t.ptraceOpts = ptraceOptions{
   991  		ExitKill:       opts&linux.PTRACE_O_EXITKILL != 0,
   992  		SysGood:        opts&linux.PTRACE_O_TRACESYSGOOD != 0,
   993  		TraceClone:     opts&linux.PTRACE_O_TRACECLONE != 0,
   994  		TraceExec:      opts&linux.PTRACE_O_TRACEEXEC != 0,
   995  		TraceExit:      opts&linux.PTRACE_O_TRACEEXIT != 0,
   996  		TraceFork:      opts&linux.PTRACE_O_TRACEFORK != 0,
   997  		TraceSeccomp:   opts&linux.PTRACE_O_TRACESECCOMP != 0,
   998  		TraceVfork:     opts&linux.PTRACE_O_TRACEVFORK != 0,
   999  		TraceVforkDone: opts&linux.PTRACE_O_TRACEVFORKDONE != 0,
  1000  	}
  1001  	return nil
  1002  }
  1003  
  1004  // Ptrace implements the ptrace system call.
  1005  func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
  1006  	// PTRACE_TRACEME ignores all other arguments.
  1007  	if req == linux.PTRACE_TRACEME {
  1008  		return t.ptraceTraceme()
  1009  	}
  1010  	// All other ptrace requests operate on a current or future tracee
  1011  	// specified by pid.
  1012  	target := t.tg.pidns.TaskWithID(pid)
  1013  	if target == nil {
  1014  		return linuxerr.ESRCH
  1015  	}
  1016  
  1017  	// PTRACE_ATTACH and PTRACE_SEIZE do not require that target is not already
  1018  	// a tracee.
  1019  	if req == linux.PTRACE_ATTACH || req == linux.PTRACE_SEIZE {
  1020  		seize := req == linux.PTRACE_SEIZE
  1021  		if seize && addr != 0 {
  1022  			return linuxerr.EIO
  1023  		}
  1024  		return t.ptraceAttach(target, seize, uintptr(data))
  1025  	}
  1026  	// PTRACE_KILL and PTRACE_INTERRUPT require that the target is a tracee,
  1027  	// but does not require that it is ptrace-stopped.
  1028  	if req == linux.PTRACE_KILL {
  1029  		return t.ptraceKill(target)
  1030  	}
  1031  	if req == linux.PTRACE_INTERRUPT {
  1032  		return t.ptraceInterrupt(target)
  1033  	}
  1034  	// All other ptrace requests require that the target is a ptrace-stopped
  1035  	// tracee, and freeze the ptrace-stop so the tracee can be operated on.
  1036  	t.tg.pidns.owner.mu.RLock()
  1037  	if target.Tracer() != t {
  1038  		t.tg.pidns.owner.mu.RUnlock()
  1039  		return linuxerr.ESRCH
  1040  	}
  1041  	if !target.ptraceFreeze() {
  1042  		t.tg.pidns.owner.mu.RUnlock()
  1043  		// "Most ptrace commands (all except PTRACE_ATTACH, PTRACE_SEIZE,
  1044  		// PTRACE_TRACEME, PTRACE_INTERRUPT, and PTRACE_KILL) require the
  1045  		// tracee to be in a ptrace-stop, otherwise they fail with ESRCH." -
  1046  		// ptrace(2)
  1047  		return linuxerr.ESRCH
  1048  	}
  1049  	t.tg.pidns.owner.mu.RUnlock()
  1050  	// Even if the target has a ptrace-stop active, the tracee's task goroutine
  1051  	// may not yet have reached Task.doStop; wait for it to do so. This is safe
  1052  	// because there's no way for target to initiate a ptrace-stop and then
  1053  	// block (by calling Task.block) before entering it.
  1054  	//
  1055  	// Caveat: If tasks were just restored, the tracee's first call to
  1056  	// Task.Activate (in Task.run) occurs before its first call to Task.doStop,
  1057  	// which may block if the tracer's address space is active.
  1058  	t.UninterruptibleSleepStart(true)
  1059  	target.waitGoroutineStoppedOrExited()
  1060  	t.UninterruptibleSleepFinish(true)
  1061  
  1062  	// Resuming commands end the ptrace stop, but only if successful.
  1063  	// PTRACE_LISTEN ends the ptrace stop if trapNotifyPending is already set on the
  1064  	// target.
  1065  	switch req {
  1066  	case linux.PTRACE_DETACH:
  1067  		if err := t.ptraceDetach(target, linux.Signal(data)); err != nil {
  1068  			target.ptraceUnfreeze()
  1069  			return err
  1070  		}
  1071  		return nil
  1072  
  1073  	case linux.PTRACE_CONT:
  1074  		if err := target.ptraceUnstop(ptraceSyscallNone, false, linux.Signal(data)); err != nil {
  1075  			target.ptraceUnfreeze()
  1076  			return err
  1077  		}
  1078  		return nil
  1079  
  1080  	case linux.PTRACE_SYSCALL:
  1081  		if err := target.ptraceUnstop(ptraceSyscallIntercept, false, linux.Signal(data)); err != nil {
  1082  			target.ptraceUnfreeze()
  1083  			return err
  1084  		}
  1085  		return nil
  1086  
  1087  	case linux.PTRACE_SINGLESTEP:
  1088  		if err := target.ptraceUnstop(ptraceSyscallNone, true, linux.Signal(data)); err != nil {
  1089  			target.ptraceUnfreeze()
  1090  			return err
  1091  		}
  1092  		return nil
  1093  
  1094  	case linux.PTRACE_SYSEMU:
  1095  		if err := target.ptraceUnstop(ptraceSyscallEmu, false, linux.Signal(data)); err != nil {
  1096  			target.ptraceUnfreeze()
  1097  			return err
  1098  		}
  1099  		return nil
  1100  
  1101  	case linux.PTRACE_SYSEMU_SINGLESTEP:
  1102  		if err := target.ptraceUnstop(ptraceSyscallEmu, true, linux.Signal(data)); err != nil {
  1103  			target.ptraceUnfreeze()
  1104  			return err
  1105  		}
  1106  		return nil
  1107  
  1108  	case linux.PTRACE_LISTEN:
  1109  		t.tg.pidns.owner.mu.RLock()
  1110  		defer t.tg.pidns.owner.mu.RUnlock()
  1111  		if !target.ptraceSeized {
  1112  			return linuxerr.EIO
  1113  		}
  1114  		if target.ptraceSiginfo == nil {
  1115  			return linuxerr.EIO
  1116  		}
  1117  		if target.ptraceSiginfo.Code>>8 != linux.PTRACE_EVENT_STOP {
  1118  			return linuxerr.EIO
  1119  		}
  1120  		target.tg.signalHandlers.mu.Lock()
  1121  		defer target.tg.signalHandlers.mu.Unlock()
  1122  		if target.trapNotifyPending {
  1123  			target.endInternalStopLocked()
  1124  		} else {
  1125  			target.stop.(*ptraceStop).listen = true
  1126  			target.ptraceUnfreezeLocked()
  1127  		}
  1128  		return nil
  1129  	}
  1130  
  1131  	// All other ptrace requests expect us to unfreeze the stop.
  1132  	defer target.ptraceUnfreeze()
  1133  
  1134  	switch req {
  1135  	case linux.PTRACE_PEEKTEXT, linux.PTRACE_PEEKDATA:
  1136  		// "At the system call level, the PTRACE_PEEKTEXT, PTRACE_PEEKDATA, and
  1137  		// PTRACE_PEEKUSER requests have a different API: they store the result
  1138  		// at the address specified by the data parameter, and the return value
  1139  		// is the error flag." - ptrace(2)
  1140  		word := t.Arch().Native(0)
  1141  		if _, err := word.CopyIn(target.CopyContext(t, usermem.IOOpts{IgnorePermissions: true}), addr); err != nil {
  1142  			return err
  1143  		}
  1144  		_, err := word.CopyOut(t, data)
  1145  		return err
  1146  
  1147  	case linux.PTRACE_POKETEXT, linux.PTRACE_POKEDATA:
  1148  		word := t.Arch().Native(uintptr(data))
  1149  		_, err := word.CopyOut(target.CopyContext(t, usermem.IOOpts{IgnorePermissions: true}), addr)
  1150  		return err
  1151  
  1152  	case linux.PTRACE_GETREGSET:
  1153  		// "Read the tracee's registers. addr specifies, in an
  1154  		// architecture-dependent way, the type of registers to be read. ...
  1155  		// data points to a struct iovec, which describes the destination
  1156  		// buffer's location and length. On return, the kernel modifies iov.len
  1157  		// to indicate the actual number of bytes returned." - ptrace(2)
  1158  		ars, err := t.CopyInIovecs(data, 1)
  1159  		if err != nil {
  1160  			return err
  1161  		}
  1162  
  1163  		ar := ars.Head()
  1164  		n, err := target.Arch().PtraceGetRegSet(uintptr(addr), &usermem.IOReadWriter{
  1165  			Ctx:  t,
  1166  			IO:   t.MemoryManager(),
  1167  			Addr: ar.Start,
  1168  			Opts: usermem.IOOpts{
  1169  				AddressSpaceActive: true,
  1170  			},
  1171  		}, int(ar.Length()), target.Kernel().FeatureSet())
  1172  		if err != nil {
  1173  			return err
  1174  		}
  1175  
  1176  		// Update iovecs to represent the range of the written register set.
  1177  		end, ok := ar.Start.AddLength(uint64(n))
  1178  		if !ok {
  1179  			panic(fmt.Sprintf("%#x + %#x overflows. Invalid reg size > %#x", ar.Start, n, ar.Length()))
  1180  		}
  1181  		ar.End = end
  1182  		return t.CopyOutIovecs(data, hostarch.AddrRangeSeqOf(ar))
  1183  
  1184  	case linux.PTRACE_SETREGSET:
  1185  		ars, err := t.CopyInIovecs(data, 1)
  1186  		if err != nil {
  1187  			return err
  1188  		}
  1189  
  1190  		ar := ars.Head()
  1191  		n, err := target.Arch().PtraceSetRegSet(uintptr(addr), &usermem.IOReadWriter{
  1192  			Ctx:  t,
  1193  			IO:   t.MemoryManager(),
  1194  			Addr: ar.Start,
  1195  			Opts: usermem.IOOpts{
  1196  				AddressSpaceActive: true,
  1197  			},
  1198  		}, int(ar.Length()), target.Kernel().FeatureSet())
  1199  		if err != nil {
  1200  			return err
  1201  		}
  1202  		target.p.FullStateChanged()
  1203  		ar.End -= hostarch.Addr(n)
  1204  		return t.CopyOutIovecs(data, hostarch.AddrRangeSeqOf(ar))
  1205  
  1206  	case linux.PTRACE_GETSIGINFO:
  1207  		t.tg.pidns.owner.mu.RLock()
  1208  		defer t.tg.pidns.owner.mu.RUnlock()
  1209  		if target.ptraceSiginfo == nil {
  1210  			return linuxerr.EINVAL
  1211  		}
  1212  		_, err := target.ptraceSiginfo.CopyOut(t, data)
  1213  		return err
  1214  
  1215  	case linux.PTRACE_SETSIGINFO:
  1216  		var info linux.SignalInfo
  1217  		if _, err := info.CopyIn(t, data); err != nil {
  1218  			return err
  1219  		}
  1220  		t.tg.pidns.owner.mu.RLock()
  1221  		defer t.tg.pidns.owner.mu.RUnlock()
  1222  		if target.ptraceSiginfo == nil {
  1223  			return linuxerr.EINVAL
  1224  		}
  1225  		target.ptraceSiginfo = &info
  1226  		return nil
  1227  
  1228  	case linux.PTRACE_GETSIGMASK:
  1229  		if addr != linux.SignalSetSize {
  1230  			return linuxerr.EINVAL
  1231  		}
  1232  		mask := target.SignalMask()
  1233  		_, err := mask.CopyOut(t, data)
  1234  		return err
  1235  
  1236  	case linux.PTRACE_SETSIGMASK:
  1237  		if addr != linux.SignalSetSize {
  1238  			return linuxerr.EINVAL
  1239  		}
  1240  		var mask linux.SignalSet
  1241  		if _, err := mask.CopyIn(t, data); err != nil {
  1242  			return err
  1243  		}
  1244  		// The target's task goroutine is stopped, so this is safe:
  1245  		target.SetSignalMask(mask &^ UnblockableSignals)
  1246  		return nil
  1247  
  1248  	case linux.PTRACE_SETOPTIONS:
  1249  		t.tg.pidns.owner.mu.Lock()
  1250  		defer t.tg.pidns.owner.mu.Unlock()
  1251  		return target.ptraceSetOptionsLocked(uintptr(data))
  1252  
  1253  	case linux.PTRACE_GETEVENTMSG:
  1254  		t.tg.pidns.owner.mu.RLock()
  1255  		defer t.tg.pidns.owner.mu.RUnlock()
  1256  		_, err := primitive.CopyUint64Out(t, hostarch.Addr(data), target.ptraceEventMsg)
  1257  		return err
  1258  
  1259  	// PEEKSIGINFO is unimplemented but seems to have no users anywhere.
  1260  
  1261  	default:
  1262  		return t.ptraceArch(target, req, addr, data)
  1263  	}
  1264  }