github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/task_exit.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  // This file implements the task exit cycle:
    18  //
    19  // - Tasks are asynchronously requested to exit with Task.Kill.
    20  //
    21  // - When able, the task goroutine enters the exit path starting from state
    22  // runExit.
    23  //
    24  // - Other tasks observe completed exits with Task.Wait (which implements the
    25  // wait*() family of syscalls).
    26  
    27  import (
    28  	"errors"
    29  	"fmt"
    30  	"strconv"
    31  	"strings"
    32  
    33  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    34  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    35  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    36  	"github.com/SagerNet/gvisor/pkg/syserror"
    37  	"github.com/SagerNet/gvisor/pkg/waiter"
    38  )
    39  
    40  // An ExitStatus is a value communicated from an exiting task or thread group
    41  // to the party that reaps it.
    42  //
    43  // +stateify savable
    44  type ExitStatus struct {
    45  	// Code is the numeric value passed to the call to exit or exit_group that
    46  	// caused the exit. If the exit was not caused by such a call, Code is 0.
    47  	Code int
    48  
    49  	// Signo is the signal that caused the exit. If the exit was not caused by
    50  	// a signal, Signo is 0.
    51  	Signo int
    52  }
    53  
    54  func (es ExitStatus) String() string {
    55  	var b strings.Builder
    56  	if code := es.Code; code != 0 {
    57  		if b.Len() != 0 {
    58  			b.WriteByte(' ')
    59  		}
    60  		_, _ = fmt.Fprintf(&b, "Code=%d", code)
    61  	}
    62  	if signal := es.Signo; signal != 0 {
    63  		if b.Len() != 0 {
    64  			b.WriteByte(' ')
    65  		}
    66  		_, _ = fmt.Fprintf(&b, "Signal=%d", signal)
    67  	}
    68  	return b.String()
    69  }
    70  
    71  // Signaled returns true if the ExitStatus indicates that the exiting task or
    72  // thread group was killed by a signal.
    73  func (es ExitStatus) Signaled() bool {
    74  	return es.Signo != 0
    75  }
    76  
    77  // Status returns the numeric representation of the ExitStatus returned by e.g.
    78  // the wait4() system call.
    79  func (es ExitStatus) Status() uint32 {
    80  	return ((uint32(es.Code) & 0xff) << 8) | (uint32(es.Signo) & 0xff)
    81  }
    82  
    83  // ShellExitCode returns the numeric exit code that Bash would return for an
    84  // exit status of es.
    85  func (es ExitStatus) ShellExitCode() int {
    86  	if es.Signaled() {
    87  		return 128 + es.Signo
    88  	}
    89  	return es.Code
    90  }
    91  
    92  // TaskExitState represents a step in the task exit path.
    93  //
    94  // "Exiting" and "exited" are often ambiguous; prefer to name specific states.
    95  type TaskExitState int
    96  
    97  const (
    98  	// TaskExitNone indicates that the task has not begun exiting.
    99  	TaskExitNone TaskExitState = iota
   100  
   101  	// TaskExitInitiated indicates that the task goroutine has entered the exit
   102  	// path, and the task is no longer eligible to participate in group stops
   103  	// or group signal handling. TaskExitInitiated is analogous to Linux's
   104  	// PF_EXITING.
   105  	TaskExitInitiated
   106  
   107  	// TaskExitZombie indicates that the task has released its resources, and
   108  	// the task no longer prevents a sibling thread from completing execve.
   109  	TaskExitZombie
   110  
   111  	// TaskExitDead indicates that the task's thread IDs have been released,
   112  	// and the task no longer prevents its thread group leader from being
   113  	// reaped. ("Reaping" refers to the transitioning of a task from
   114  	// TaskExitZombie to TaskExitDead.)
   115  	TaskExitDead
   116  )
   117  
   118  // String implements fmt.Stringer.
   119  func (t TaskExitState) String() string {
   120  	switch t {
   121  	case TaskExitNone:
   122  		return "TaskExitNone"
   123  	case TaskExitInitiated:
   124  		return "TaskExitInitiated"
   125  	case TaskExitZombie:
   126  		return "TaskExitZombie"
   127  	case TaskExitDead:
   128  		return "TaskExitDead"
   129  	default:
   130  		return strconv.Itoa(int(t))
   131  	}
   132  }
   133  
   134  // killLocked marks t as killed by enqueueing a SIGKILL, without causing the
   135  // thread-group-affecting side effects SIGKILL usually has.
   136  //
   137  // Preconditions: The signal mutex must be locked.
   138  func (t *Task) killLocked() {
   139  	// Clear killable stops.
   140  	if t.stop != nil && t.stop.Killable() {
   141  		t.endInternalStopLocked()
   142  	}
   143  	t.pendingSignals.enqueue(&linux.SignalInfo{
   144  		Signo: int32(linux.SIGKILL),
   145  		// Linux just sets SIGKILL in the pending signal bitmask without
   146  		// enqueueing an actual siginfo, such that
   147  		// kernel/signal.c:collect_signal() initializes si_code to SI_USER.
   148  		Code: linux.SI_USER,
   149  	}, nil)
   150  	t.interrupt()
   151  }
   152  
   153  // killed returns true if t has a SIGKILL pending. killed is analogous to
   154  // Linux's fatal_signal_pending().
   155  //
   156  // Preconditions: The caller must be running on the task goroutine.
   157  func (t *Task) killed() bool {
   158  	t.tg.signalHandlers.mu.Lock()
   159  	defer t.tg.signalHandlers.mu.Unlock()
   160  	return t.killedLocked()
   161  }
   162  
   163  func (t *Task) killedLocked() bool {
   164  	return t.pendingSignals.pendingSet&linux.SignalSetOf(linux.SIGKILL) != 0
   165  }
   166  
   167  // PrepareExit indicates an exit with status es.
   168  //
   169  // Preconditions: The caller must be running on the task goroutine.
   170  func (t *Task) PrepareExit(es ExitStatus) {
   171  	t.tg.signalHandlers.mu.Lock()
   172  	defer t.tg.signalHandlers.mu.Unlock()
   173  	t.exitStatus = es
   174  }
   175  
   176  // PrepareGroupExit indicates a group exit with status es to t's thread group.
   177  //
   178  // PrepareGroupExit is analogous to Linux's do_group_exit(), except that it
   179  // does not tail-call do_exit(), except that it *does* set Task.exitStatus.
   180  // (Linux does not do so until within do_exit(), since it reuses exit_code for
   181  // ptrace.)
   182  //
   183  // Preconditions: The caller must be running on the task goroutine.
   184  func (t *Task) PrepareGroupExit(es ExitStatus) {
   185  	t.tg.signalHandlers.mu.Lock()
   186  	defer t.tg.signalHandlers.mu.Unlock()
   187  	if t.tg.exiting || t.tg.execing != nil {
   188  		// Note that if t.tg.exiting is false but t.tg.execing is not nil, i.e.
   189  		// this "group exit" is being executed by the killed sibling of an
   190  		// execing task, then Task.Execve never set t.tg.exitStatus, so it's
   191  		// still the zero value. This is consistent with Linux, both in intent
   192  		// ("all other threads ... report death as if they exited via _exit(2)
   193  		// with exit code 0" - ptrace(2), "execve under ptrace") and in
   194  		// implementation (compare fs/exec.c:de_thread() =>
   195  		// kernel/signal.c:zap_other_threads() and
   196  		// kernel/exit.c:do_group_exit() =>
   197  		// include/linux/sched.h:signal_group_exit()).
   198  		t.exitStatus = t.tg.exitStatus
   199  		return
   200  	}
   201  	t.tg.exiting = true
   202  	t.tg.exitStatus = es
   203  	t.exitStatus = es
   204  	for sibling := t.tg.tasks.Front(); sibling != nil; sibling = sibling.Next() {
   205  		if sibling != t {
   206  			sibling.killLocked()
   207  		}
   208  	}
   209  }
   210  
   211  // Kill requests that all tasks in ts exit as if group exiting with status es.
   212  // Kill does not wait for tasks to exit.
   213  //
   214  // Kill has no analogue in Linux; it's provided for save/restore only.
   215  func (ts *TaskSet) Kill(es ExitStatus) {
   216  	ts.mu.Lock()
   217  	defer ts.mu.Unlock()
   218  	ts.Root.exiting = true
   219  	for t := range ts.Root.tids {
   220  		t.tg.signalHandlers.mu.Lock()
   221  		if !t.tg.exiting {
   222  			t.tg.exiting = true
   223  			t.tg.exitStatus = es
   224  		}
   225  		t.killLocked()
   226  		t.tg.signalHandlers.mu.Unlock()
   227  	}
   228  }
   229  
   230  // advanceExitStateLocked checks that t's current exit state is oldExit, then
   231  // sets it to newExit. If t's current exit state is not oldExit,
   232  // advanceExitStateLocked panics.
   233  //
   234  // Preconditions: The TaskSet mutex must be locked.
   235  func (t *Task) advanceExitStateLocked(oldExit, newExit TaskExitState) {
   236  	if t.exitState != oldExit {
   237  		panic(fmt.Sprintf("Transitioning from exit state %v to %v: unexpected preceding state %v", oldExit, newExit, t.exitState))
   238  	}
   239  	t.Debugf("Transitioning from exit state %v to %v", oldExit, newExit)
   240  	t.exitState = newExit
   241  }
   242  
   243  // runExit is the entry point into the task exit path.
   244  //
   245  // +stateify savable
   246  type runExit struct{}
   247  
   248  func (*runExit) execute(t *Task) taskRunState {
   249  	t.ptraceExit()
   250  	return (*runExitMain)(nil)
   251  }
   252  
   253  // +stateify savable
   254  type runExitMain struct{}
   255  
   256  func (*runExitMain) execute(t *Task) taskRunState {
   257  	t.traceExitEvent()
   258  	lastExiter := t.exitThreadGroup()
   259  
   260  	t.ResetKcov()
   261  
   262  	// If the task has a cleartid, and the thread group wasn't killed by a
   263  	// signal, handle that before releasing the MM.
   264  	if t.cleartid != 0 {
   265  		t.tg.signalHandlers.mu.Lock()
   266  		signaled := t.tg.exiting && t.tg.exitStatus.Signaled()
   267  		t.tg.signalHandlers.mu.Unlock()
   268  		if !signaled {
   269  			zero := ThreadID(0)
   270  			if _, err := zero.CopyOut(t, t.cleartid); err == nil {
   271  				t.Futex().Wake(t, t.cleartid, false, ^uint32(0), 1)
   272  			}
   273  			// If the CopyOut fails, there's nothing we can do.
   274  		}
   275  	}
   276  
   277  	// Handle the robust futex list.
   278  	t.exitRobustList()
   279  
   280  	// Deactivate the address space and update max RSS before releasing the
   281  	// task's MM.
   282  	t.Deactivate()
   283  	t.tg.pidns.owner.mu.Lock()
   284  	t.updateRSSLocked()
   285  	t.tg.pidns.owner.mu.Unlock()
   286  	t.mu.Lock()
   287  	t.image.release()
   288  	t.mu.Unlock()
   289  
   290  	// Releasing the MM unblocks a blocked CLONE_VFORK parent.
   291  	t.unstopVforkParent()
   292  
   293  	t.fsContext.DecRef(t)
   294  	t.fdTable.DecRef(t)
   295  
   296  	// Detach task from all cgroups. This must happen before potentially the
   297  	// last ref to the cgroupfs mount is dropped below.
   298  	t.LeaveCgroups()
   299  
   300  	t.mu.Lock()
   301  	if t.mountNamespaceVFS2 != nil {
   302  		t.mountNamespaceVFS2.DecRef(t)
   303  		t.mountNamespaceVFS2 = nil
   304  	}
   305  	t.ipcns.DecRef(t)
   306  	t.mu.Unlock()
   307  
   308  	// If this is the last task to exit from the thread group, release the
   309  	// thread group's resources.
   310  	if lastExiter {
   311  		t.tg.Release(t)
   312  	}
   313  
   314  	// Detach tracees.
   315  	t.exitPtrace()
   316  
   317  	// Reparent the task's children.
   318  	t.exitChildren()
   319  
   320  	// Don't tail-call runExitNotify, as exitChildren may have initiated a stop
   321  	// to wait for a PID namespace to die.
   322  	return (*runExitNotify)(nil)
   323  }
   324  
   325  // exitThreadGroup transitions t to TaskExitInitiated, indicating to t's thread
   326  // group that it is no longer eligible to participate in group activities. It
   327  // returns true if t is the last task in its thread group to call
   328  // exitThreadGroup.
   329  func (t *Task) exitThreadGroup() bool {
   330  	t.tg.pidns.owner.mu.Lock()
   331  	defer t.tg.pidns.owner.mu.Unlock()
   332  	t.tg.signalHandlers.mu.Lock()
   333  	// Can't defer unlock: see below.
   334  
   335  	t.advanceExitStateLocked(TaskExitNone, TaskExitInitiated)
   336  	t.tg.activeTasks--
   337  	last := t.tg.activeTasks == 0
   338  
   339  	// Ensure that someone will handle the signals we can't.
   340  	t.setSignalMaskLocked(^linux.SignalSet(0))
   341  
   342  	// Check if this task's exit interacts with an initiated group stop.
   343  	if !t.groupStopPending {
   344  		t.tg.signalHandlers.mu.Unlock()
   345  		return last
   346  	}
   347  	t.groupStopPending = false
   348  	sig := t.tg.groupStopSignal
   349  	notifyParent := t.participateGroupStopLocked()
   350  	// signalStop must be called with t's signal mutex unlocked.
   351  	t.tg.signalHandlers.mu.Unlock()
   352  	if notifyParent && t.tg.leader.parent != nil {
   353  		t.tg.leader.parent.signalStop(t, linux.CLD_STOPPED, int32(sig))
   354  		t.tg.leader.parent.tg.eventQueue.Notify(EventChildGroupStop)
   355  	}
   356  	return last
   357  }
   358  
   359  func (t *Task) exitChildren() {
   360  	t.tg.pidns.owner.mu.Lock()
   361  	defer t.tg.pidns.owner.mu.Unlock()
   362  	newParent := t.findReparentTargetLocked()
   363  	if newParent == nil {
   364  		// "If the init process of a PID namespace terminates, the kernel
   365  		// terminates all of the processes in the namespace via a SIGKILL
   366  		// signal." - pid_namespaces(7)
   367  		t.Debugf("Init process terminating, killing namespace")
   368  		t.tg.pidns.exiting = true
   369  		for other := range t.tg.pidns.tgids {
   370  			if other == t.tg {
   371  				continue
   372  			}
   373  			other.signalHandlers.mu.Lock()
   374  			other.leader.sendSignalLocked(&linux.SignalInfo{
   375  				Signo: int32(linux.SIGKILL),
   376  			}, true /* group */)
   377  			other.signalHandlers.mu.Unlock()
   378  		}
   379  		// TODO(b/37722272): The init process waits for all processes in the
   380  		// namespace to exit before completing its own exit
   381  		// (kernel/pid_namespace.c:zap_pid_ns_processes()). Stop until all
   382  		// other tasks in the namespace are dead, except possibly for this
   383  		// thread group's leader (which can't be reaped until this task exits).
   384  	}
   385  	// This is correct even if newParent is nil (it ensures that children don't
   386  	// wait for a parent to reap them.)
   387  	for c := range t.children {
   388  		if sig := c.ParentDeathSignal(); sig != 0 {
   389  			siginfo := &linux.SignalInfo{
   390  				Signo: int32(sig),
   391  				Code:  linux.SI_USER,
   392  			}
   393  			siginfo.SetPID(int32(c.tg.pidns.tids[t]))
   394  			siginfo.SetUID(int32(t.Credentials().RealKUID.In(c.UserNamespace()).OrOverflow()))
   395  			c.tg.signalHandlers.mu.Lock()
   396  			c.sendSignalLocked(siginfo, true /* group */)
   397  			c.tg.signalHandlers.mu.Unlock()
   398  		}
   399  		c.reparentLocked(newParent)
   400  		if newParent != nil {
   401  			newParent.children[c] = struct{}{}
   402  		}
   403  	}
   404  }
   405  
   406  // findReparentTargetLocked returns the task to which t's children should be
   407  // reparented. If no such task exists, findNewParentLocked returns nil.
   408  //
   409  // Preconditions: The TaskSet mutex must be locked.
   410  func (t *Task) findReparentTargetLocked() *Task {
   411  	// Reparent to any sibling in the same thread group that hasn't begun
   412  	// exiting.
   413  	if t2 := t.tg.anyNonExitingTaskLocked(); t2 != nil {
   414  		return t2
   415  	}
   416  	// "A child process that is orphaned within the namespace will be
   417  	// reparented to [the init process for the namespace] ..." -
   418  	// pid_namespaces(7)
   419  	if init := t.tg.pidns.tasks[InitTID]; init != nil {
   420  		return init.tg.anyNonExitingTaskLocked()
   421  	}
   422  	return nil
   423  }
   424  
   425  func (tg *ThreadGroup) anyNonExitingTaskLocked() *Task {
   426  	for t := tg.tasks.Front(); t != nil; t = t.Next() {
   427  		if t.exitState == TaskExitNone {
   428  			return t
   429  		}
   430  	}
   431  	return nil
   432  }
   433  
   434  // reparentLocked changes t's parent. The new parent may be nil.
   435  //
   436  // Preconditions: The TaskSet mutex must be locked for writing.
   437  func (t *Task) reparentLocked(parent *Task) {
   438  	oldParent := t.parent
   439  	t.parent = parent
   440  	if oldParent != nil {
   441  		delete(oldParent.children, t)
   442  	}
   443  	if parent != nil {
   444  		parent.children[t] = struct{}{}
   445  	}
   446  	// If a thread group leader's parent changes, reset the thread group's
   447  	// termination signal to SIGCHLD and re-check exit notification. (Compare
   448  	// kernel/exit.c:reparent_leader().)
   449  	if t != t.tg.leader {
   450  		return
   451  	}
   452  	if oldParent == nil && parent == nil {
   453  		return
   454  	}
   455  	if oldParent != nil && parent != nil && oldParent.tg == parent.tg {
   456  		return
   457  	}
   458  	t.tg.terminationSignal = linux.SIGCHLD
   459  	if t.exitParentNotified && !t.exitParentAcked {
   460  		t.exitParentNotified = false
   461  		t.exitNotifyLocked(false)
   462  	}
   463  }
   464  
   465  // When a task exits, other tasks in the system, notably the task's parent and
   466  // ptracer, may want to be notified. The exit notification system ensures that
   467  // interested tasks receive signals and/or are woken from blocking calls to
   468  // wait*() syscalls; these notifications must be resolved before exiting tasks
   469  // can be reaped and disappear from the system.
   470  //
   471  // Each task may have a parent task and/or a tracer task. If both a parent and
   472  // a tracer exist, they may be the same task, different tasks in the same
   473  // thread group, or tasks in different thread groups. (In the last case, Linux
   474  // refers to the task as being ptrace-reparented due to an implementation
   475  // detail; we avoid this terminology to avoid confusion.)
   476  //
   477  // A thread group is *empty* if all non-leader tasks in the thread group are
   478  // dead, and the leader is either a zombie or dead. The exit of a thread group
   479  // leader is never waitable - by either the parent or tracer - until the thread
   480  // group is empty.
   481  //
   482  // There are a few ways for an exit notification to be resolved:
   483  //
   484  // - The exit notification may be acknowledged by a call to Task.Wait with
   485  // WaitOptions.ConsumeEvent set (e.g. due to a wait4() syscall).
   486  //
   487  // - If the notified party is the parent, and the parent thread group is not
   488  // also the tracer thread group, and the notification signal is SIGCHLD, the
   489  // parent may explicitly ignore the notification (see quote in exitNotify).
   490  // Note that it's possible for the notified party to ignore the signal in other
   491  // cases, but the notification is only resolved under the above conditions.
   492  // (Actually, there is one exception; see the last paragraph of the "leader,
   493  // has tracer, tracer thread group is parent thread group" case below.)
   494  //
   495  // - If the notified party is the parent, and the parent does not exist, the
   496  // notification is resolved as if ignored. (This is only possible in the
   497  // sentry. In Linux, the only task / thread group without a parent is global
   498  // init, and killing global init causes a kernel panic.)
   499  //
   500  // - If the notified party is a tracer, the tracer may detach the traced task.
   501  // (Zombie tasks cannot be ptrace-attached, so the reverse is not possible.)
   502  //
   503  // In addition, if the notified party is the parent, the parent may exit and
   504  // cause the notifying task to be reparented to another thread group. This does
   505  // not resolve the notification; instead, the notification must be resent to
   506  // the new parent.
   507  //
   508  // The series of notifications generated for a given task's exit depend on
   509  // whether it is a thread group leader; whether the task is ptraced; and, if
   510  // so, whether the tracer thread group is the same as the parent thread group.
   511  //
   512  // - Non-leader, no tracer: No notification is generated; the task is reaped
   513  // immediately.
   514  //
   515  // - Non-leader, has tracer: SIGCHLD is sent to the tracer. When the tracer
   516  // notification is resolved (by waiting or detaching), the task is reaped. (For
   517  // non-leaders, whether the tracer and parent thread groups are the same is
   518  // irrelevant.)
   519  //
   520  // - Leader, no tracer: The task remains a zombie, with no notification sent,
   521  // until all other tasks in the thread group are dead. (In Linux terms, this
   522  // condition is indicated by include/linux/sched.h:thread_group_empty(); tasks
   523  // are removed from their thread_group list in kernel/exit.c:release_task() =>
   524  // __exit_signal() => __unhash_process().) Then the thread group's termination
   525  // signal is sent to the parent. When the parent notification is resolved (by
   526  // waiting or ignoring), the task is reaped.
   527  //
   528  // - Leader, has tracer, tracer thread group is not parent thread group:
   529  // SIGCHLD is sent to the tracer. When the tracer notification is resolved (by
   530  // waiting or detaching), and all other tasks in the thread group are dead, the
   531  // thread group's termination signal is sent to the parent. (Note that the
   532  // tracer cannot resolve the exit notification by waiting until the thread
   533  // group is empty.) When the parent notification is resolved, the task is
   534  // reaped.
   535  //
   536  // - Leader, has tracer, tracer thread group is parent thread group:
   537  //
   538  // If all other tasks in the thread group are dead, the thread group's
   539  // termination signal is sent to the parent. At this point, the notification
   540  // can only be resolved by waiting. If the parent detaches from the task as a
   541  // tracer, the notification is not resolved, but the notification can now be
   542  // resolved by waiting or ignoring. When the parent notification is resolved,
   543  // the task is reaped.
   544  //
   545  // If at least one task in the thread group is not dead, SIGCHLD is sent to the
   546  // parent. At this point, the notification cannot be resolved at all; once the
   547  // thread group becomes empty, it can be resolved only by waiting. If the
   548  // parent detaches from the task as a tracer before all remaining tasks die,
   549  // then exit notification proceeds as in the case where the leader never had a
   550  // tracer. If the parent detaches from the task as a tracer after all remaining
   551  // tasks die, the notification is not resolved, but the notification can now be
   552  // resolved by waiting or ignoring. When the parent notification is resolved,
   553  // the task is reaped.
   554  //
   555  // In both of the above cases, when the parent detaches from the task as a
   556  // tracer while the thread group is empty, whether or not the parent resolves
   557  // the notification by ignoring it is based on the parent's SIGCHLD signal
   558  // action, whether or not the thread group's termination signal is SIGCHLD
   559  // (Linux: kernel/ptrace.c:__ptrace_detach() => ignoring_children()).
   560  //
   561  // There is one final wrinkle: A leader can become a non-leader due to a
   562  // sibling execve. In this case, the execing thread detaches the leader's
   563  // tracer (if one exists) and reaps the leader immediately. In Linux, this is
   564  // in fs/exec.c:de_thread(); in the sentry, this is in Task.promoteLocked().
   565  
   566  // +stateify savable
   567  type runExitNotify struct{}
   568  
   569  func (*runExitNotify) execute(t *Task) taskRunState {
   570  	t.tg.pidns.owner.mu.Lock()
   571  	defer t.tg.pidns.owner.mu.Unlock()
   572  	t.advanceExitStateLocked(TaskExitInitiated, TaskExitZombie)
   573  	t.tg.liveTasks--
   574  	// Check if this completes a sibling's execve.
   575  	if t.tg.execing != nil && t.tg.liveTasks == 1 {
   576  		// execing blocks the addition of new tasks to the thread group, so
   577  		// the sole living task must be the execing one.
   578  		e := t.tg.execing
   579  		e.tg.signalHandlers.mu.Lock()
   580  		if _, ok := e.stop.(*execStop); ok {
   581  			e.endInternalStopLocked()
   582  		}
   583  		e.tg.signalHandlers.mu.Unlock()
   584  	}
   585  	t.exitNotifyLocked(false)
   586  	// The task goroutine will now exit.
   587  	return nil
   588  }
   589  
   590  // exitNotifyLocked is called after changes to t's state that affect exit
   591  // notification.
   592  //
   593  // If fromPtraceDetach is true, the caller is ptraceDetach or exitPtrace;
   594  // thanks to Linux's haphazard implementation of this functionality, such cases
   595  // determine whether parent notifications are ignored based on the parent's
   596  // handling of SIGCHLD, regardless of what the exited task's thread group's
   597  // termination signal is.
   598  //
   599  // Preconditions: The TaskSet mutex must be locked for writing.
   600  func (t *Task) exitNotifyLocked(fromPtraceDetach bool) {
   601  	if t.exitState != TaskExitZombie {
   602  		return
   603  	}
   604  	if !t.exitTracerNotified {
   605  		t.exitTracerNotified = true
   606  		tracer := t.Tracer()
   607  		if tracer == nil {
   608  			t.exitTracerAcked = true
   609  		} else if t != t.tg.leader || t.parent == nil || tracer.tg != t.parent.tg {
   610  			// Don't set exitParentNotified if t is non-leader, even if the
   611  			// tracer is in the parent thread group, so that if the parent
   612  			// detaches the following call to exitNotifyLocked passes through
   613  			// the !exitParentNotified case below and causes t to be reaped
   614  			// immediately.
   615  			//
   616  			// Tracer notification doesn't care about about
   617  			// SIG_IGN/SA_NOCLDWAIT.
   618  			tracer.tg.signalHandlers.mu.Lock()
   619  			tracer.sendSignalLocked(t.exitNotificationSignal(linux.SIGCHLD, tracer), true /* group */)
   620  			tracer.tg.signalHandlers.mu.Unlock()
   621  			// Wake EventTraceeStop waiters as well since this task will never
   622  			// ptrace-stop again.
   623  			tracer.tg.eventQueue.Notify(EventExit | EventTraceeStop)
   624  		} else {
   625  			// t is a leader and the tracer is in the parent thread group.
   626  			t.exitParentNotified = true
   627  			sig := linux.SIGCHLD
   628  			if t.tg.tasksCount == 1 {
   629  				sig = t.tg.terminationSignal
   630  			}
   631  			// This notification doesn't care about SIG_IGN/SA_NOCLDWAIT either
   632  			// (in Linux, the check in do_notify_parent() is gated by
   633  			// !tsk->ptrace.)
   634  			t.parent.tg.signalHandlers.mu.Lock()
   635  			t.parent.sendSignalLocked(t.exitNotificationSignal(sig, t.parent), true /* group */)
   636  			t.parent.tg.signalHandlers.mu.Unlock()
   637  			// See below for rationale for this event mask.
   638  			t.parent.tg.eventQueue.Notify(EventExit | EventChildGroupStop | EventGroupContinue)
   639  		}
   640  	}
   641  	if t.exitTracerAcked && !t.exitParentNotified {
   642  		if t != t.tg.leader {
   643  			t.exitParentNotified = true
   644  			t.exitParentAcked = true
   645  		} else if t.tg.tasksCount == 1 {
   646  			t.exitParentNotified = true
   647  			if t.parent == nil {
   648  				t.exitParentAcked = true
   649  			} else {
   650  				// "POSIX.1-2001 specifies that if the disposition of SIGCHLD is
   651  				// set to SIG_IGN or the SA_NOCLDWAIT flag is set for SIGCHLD (see
   652  				// sigaction(2)), then children that terminate do not become
   653  				// zombies and a call to wait() or waitpid() will block until all
   654  				// children have terminated, and then fail with errno set to
   655  				// ECHILD. (The original POSIX standard left the behavior of
   656  				// setting SIGCHLD to SIG_IGN unspecified. Note that even though
   657  				// the default disposition of SIGCHLD is "ignore", explicitly
   658  				// setting the disposition to SIG_IGN results in different
   659  				// treatment of zombie process children.) Linux 2.6 conforms to
   660  				// this specification." - wait(2)
   661  				//
   662  				// Some undocumented Linux-specific details:
   663  				//
   664  				// - All of the above is ignored if the termination signal isn't
   665  				// SIGCHLD.
   666  				//
   667  				// - SA_NOCLDWAIT causes the leader to be immediately reaped, but
   668  				// does not suppress the SIGCHLD.
   669  				signalParent := t.tg.terminationSignal.IsValid()
   670  				t.parent.tg.signalHandlers.mu.Lock()
   671  				if t.tg.terminationSignal == linux.SIGCHLD || fromPtraceDetach {
   672  					if act, ok := t.parent.tg.signalHandlers.actions[linux.SIGCHLD]; ok {
   673  						if act.Handler == linux.SIG_IGN {
   674  							t.exitParentAcked = true
   675  							signalParent = false
   676  						} else if act.Flags&linux.SA_NOCLDWAIT != 0 {
   677  							t.exitParentAcked = true
   678  						}
   679  					}
   680  				}
   681  				if signalParent {
   682  					t.parent.tg.leader.sendSignalLocked(t.exitNotificationSignal(t.tg.terminationSignal, t.parent), true /* group */)
   683  				}
   684  				t.parent.tg.signalHandlers.mu.Unlock()
   685  				// If a task in the parent was waiting for a child group stop
   686  				// or continue, it needs to be notified of the exit, because
   687  				// there may be no remaining eligible tasks (so that wait
   688  				// should return ECHILD).
   689  				t.parent.tg.eventQueue.Notify(EventExit | EventChildGroupStop | EventGroupContinue)
   690  			}
   691  		}
   692  	}
   693  	if t.exitTracerAcked && t.exitParentAcked {
   694  		t.advanceExitStateLocked(TaskExitZombie, TaskExitDead)
   695  		for ns := t.tg.pidns; ns != nil; ns = ns.parent {
   696  			tid := ns.tids[t]
   697  			delete(ns.tasks, tid)
   698  			delete(ns.tids, t)
   699  			if t == t.tg.leader {
   700  				delete(ns.tgids, t.tg)
   701  			}
   702  		}
   703  		t.tg.exitedCPUStats.Accumulate(t.CPUStats())
   704  		t.tg.ioUsage.Accumulate(t.ioUsage)
   705  		t.tg.signalHandlers.mu.Lock()
   706  		t.tg.tasks.Remove(t)
   707  		t.tg.tasksCount--
   708  		tc := t.tg.tasksCount
   709  		t.tg.signalHandlers.mu.Unlock()
   710  		if tc == 1 && t != t.tg.leader {
   711  			// Our fromPtraceDetach doesn't matter here (in Linux terms, this
   712  			// is via a call to release_task()).
   713  			t.tg.leader.exitNotifyLocked(false)
   714  		} else if tc == 0 {
   715  			t.tg.processGroup.decRefWithParent(t.tg.parentPG())
   716  		}
   717  		if t.parent != nil {
   718  			delete(t.parent.children, t)
   719  			// Do not clear t.parent. It may be still be needed after the task has exited
   720  			// (for example, to perform ptrace access checks on /proc/[pid] files).
   721  		}
   722  	}
   723  }
   724  
   725  // Preconditions: The TaskSet mutex must be locked.
   726  func (t *Task) exitNotificationSignal(sig linux.Signal, receiver *Task) *linux.SignalInfo {
   727  	info := &linux.SignalInfo{
   728  		Signo: int32(sig),
   729  	}
   730  	info.SetPID(int32(receiver.tg.pidns.tids[t]))
   731  	info.SetUID(int32(t.Credentials().RealKUID.In(receiver.UserNamespace()).OrOverflow()))
   732  	if t.exitStatus.Signaled() {
   733  		info.Code = linux.CLD_KILLED
   734  		info.SetStatus(int32(t.exitStatus.Signo))
   735  	} else {
   736  		info.Code = linux.CLD_EXITED
   737  		info.SetStatus(int32(t.exitStatus.Code))
   738  	}
   739  	// TODO(b/72102453): Set utime, stime.
   740  	return info
   741  }
   742  
   743  // ExitStatus returns t's exit status, which is only guaranteed to be
   744  // meaningful if t.ExitState() != TaskExitNone.
   745  func (t *Task) ExitStatus() ExitStatus {
   746  	t.tg.pidns.owner.mu.RLock()
   747  	defer t.tg.pidns.owner.mu.RUnlock()
   748  	t.tg.signalHandlers.mu.Lock()
   749  	defer t.tg.signalHandlers.mu.Unlock()
   750  	return t.exitStatus
   751  }
   752  
   753  // ExitStatus returns the exit status that would be returned by a consuming
   754  // wait*() on tg.
   755  func (tg *ThreadGroup) ExitStatus() ExitStatus {
   756  	tg.pidns.owner.mu.RLock()
   757  	defer tg.pidns.owner.mu.RUnlock()
   758  	tg.signalHandlers.mu.Lock()
   759  	defer tg.signalHandlers.mu.Unlock()
   760  	if tg.exiting {
   761  		return tg.exitStatus
   762  	}
   763  	return tg.leader.exitStatus
   764  }
   765  
   766  // TerminationSignal returns the thread group's termination signal.
   767  func (tg *ThreadGroup) TerminationSignal() linux.Signal {
   768  	tg.pidns.owner.mu.RLock()
   769  	defer tg.pidns.owner.mu.RUnlock()
   770  	return tg.terminationSignal
   771  }
   772  
   773  // Task events that can be waited for.
   774  const (
   775  	// EventExit represents an exit notification generated for a child thread
   776  	// group leader or a tracee under the conditions specified in the comment
   777  	// above runExitNotify.
   778  	EventExit waiter.EventMask = 1 << iota
   779  
   780  	// EventChildGroupStop occurs when a child thread group completes a group
   781  	// stop (i.e. all tasks in the child thread group have entered a stopped
   782  	// state as a result of a group stop).
   783  	EventChildGroupStop
   784  
   785  	// EventTraceeStop occurs when a task that is ptraced by a task in the
   786  	// notified thread group enters a ptrace stop (see ptrace(2)).
   787  	EventTraceeStop
   788  
   789  	// EventGroupContinue occurs when a child thread group, or a thread group
   790  	// whose leader is ptraced by a task in the notified thread group, that had
   791  	// initiated or completed a group stop leaves the group stop, due to the
   792  	// child thread group or any task in the child thread group being sent
   793  	// SIGCONT.
   794  	EventGroupContinue
   795  )
   796  
   797  // WaitOptions controls the behavior of Task.Wait.
   798  type WaitOptions struct {
   799  	// If SpecificTID is non-zero, only events from the task with thread ID
   800  	// SpecificTID are eligible to be waited for. SpecificTID is resolved in
   801  	// the PID namespace of the waiter (the method receiver of Task.Wait). If
   802  	// no such task exists, or that task would not otherwise be eligible to be
   803  	// waited for by the waiting task, then there are no waitable tasks and
   804  	// Wait will return ECHILD.
   805  	SpecificTID ThreadID
   806  
   807  	// If SpecificPGID is non-zero, only events from ThreadGroups with a
   808  	// matching ProcessGroupID are eligible to be waited for. (Same
   809  	// constraints as SpecificTID apply.)
   810  	SpecificPGID ProcessGroupID
   811  
   812  	// Terminology note: Per waitpid(2), "a clone child is one which delivers
   813  	// no signal, or a signal other than SIGCHLD to its parent upon
   814  	// termination." In Linux, termination signal is technically a per-task
   815  	// property rather than a per-thread-group property. However, clone()
   816  	// forces no termination signal for tasks created with CLONE_THREAD, and
   817  	// execve() resets the termination signal to SIGCHLD, so all
   818  	// non-group-leader threads have no termination signal and are therefore
   819  	// "clone tasks".
   820  
   821  	// If NonCloneTasks is true, events from non-clone tasks are eligible to be
   822  	// waited for.
   823  	NonCloneTasks bool
   824  
   825  	// If CloneTasks is true, events from clone tasks are eligible to be waited
   826  	// for.
   827  	CloneTasks bool
   828  
   829  	// If SiblingChildren is true, events from children tasks of any task
   830  	// in the thread group of the waiter are eligible to be waited for.
   831  	SiblingChildren bool
   832  
   833  	// Events is a bitwise combination of the events defined above that specify
   834  	// what events are of interest to the call to Wait.
   835  	Events waiter.EventMask
   836  
   837  	// If ConsumeEvent is true, the Wait should consume the event such that it
   838  	// cannot be returned by a future Wait. Note that if a task exit is
   839  	// consumed in this way, in most cases the task will be reaped.
   840  	ConsumeEvent bool
   841  
   842  	// If BlockInterruptErr is not nil, Wait will block until either an event
   843  	// is available or there are no tasks that could produce a waitable event;
   844  	// if that blocking is interrupted, Wait returns BlockInterruptErr. If
   845  	// BlockInterruptErr is nil, Wait will not block.
   846  	BlockInterruptErr error
   847  }
   848  
   849  // Preconditions: The TaskSet mutex must be locked (for reading or writing).
   850  func (o *WaitOptions) matchesTask(t *Task, pidns *PIDNamespace, tracee bool) bool {
   851  	if o.SpecificTID != 0 && o.SpecificTID != pidns.tids[t] {
   852  		return false
   853  	}
   854  	if o.SpecificPGID != 0 && o.SpecificPGID != pidns.pgids[t.tg.processGroup] {
   855  		return false
   856  	}
   857  	// Tracees are always eligible.
   858  	if tracee {
   859  		return true
   860  	}
   861  	if t == t.tg.leader && t.tg.terminationSignal == linux.SIGCHLD {
   862  		return o.NonCloneTasks
   863  	}
   864  	return o.CloneTasks
   865  }
   866  
   867  // ErrNoWaitableEvent is returned by non-blocking Task.Waits (e.g.
   868  // waitpid(WNOHANG)) that find no waitable events, but determine that waitable
   869  // events may exist in the future. (In contrast, if a non-blocking or blocking
   870  // Wait determines that there are no tasks that can produce a waitable event,
   871  // Task.Wait returns ECHILD.)
   872  var ErrNoWaitableEvent = errors.New("non-blocking Wait found eligible threads but no waitable events")
   873  
   874  // WaitResult contains information about a waited-for event.
   875  type WaitResult struct {
   876  	// Task is the task that reported the event.
   877  	Task *Task
   878  
   879  	// TID is the thread ID of Task in the PID namespace of the task that
   880  	// called Wait (that is, the method receiver of the call to Task.Wait). TID
   881  	// is provided because consuming exit waits cause the thread ID to be
   882  	// deallocated.
   883  	TID ThreadID
   884  
   885  	// UID is the real UID of Task in the user namespace of the task that
   886  	// called Wait.
   887  	UID auth.UID
   888  
   889  	// Event is exactly one of the events defined above.
   890  	Event waiter.EventMask
   891  
   892  	// Status is the numeric status associated with the event.
   893  	Status uint32
   894  }
   895  
   896  // Wait waits for an event from a thread group that is a child of t's thread
   897  // group, or a task in such a thread group, or a task that is ptraced by t,
   898  // subject to the options specified in opts.
   899  func (t *Task) Wait(opts *WaitOptions) (*WaitResult, error) {
   900  	if opts.BlockInterruptErr == nil {
   901  		return t.waitOnce(opts)
   902  	}
   903  	w, ch := waiter.NewChannelEntry(nil)
   904  	t.tg.eventQueue.EventRegister(&w, opts.Events)
   905  	defer t.tg.eventQueue.EventUnregister(&w)
   906  	for {
   907  		wr, err := t.waitOnce(opts)
   908  		if err != ErrNoWaitableEvent {
   909  			// This includes err == nil.
   910  			return wr, err
   911  		}
   912  		if err := t.Block(ch); err != nil {
   913  			return wr, syserror.ConvertIntr(err, opts.BlockInterruptErr)
   914  		}
   915  	}
   916  }
   917  
   918  func (t *Task) waitOnce(opts *WaitOptions) (*WaitResult, error) {
   919  	anyWaitableTasks := false
   920  
   921  	t.tg.pidns.owner.mu.Lock()
   922  	defer t.tg.pidns.owner.mu.Unlock()
   923  
   924  	if opts.SiblingChildren {
   925  		// We can wait on the children and tracees of any task in the
   926  		// same thread group.
   927  		for parent := t.tg.tasks.Front(); parent != nil; parent = parent.Next() {
   928  			wr, any := t.waitParentLocked(opts, parent)
   929  			if wr != nil {
   930  				return wr, nil
   931  			}
   932  			anyWaitableTasks = anyWaitableTasks || any
   933  		}
   934  	} else {
   935  		// We can only wait on this task.
   936  		var wr *WaitResult
   937  		wr, anyWaitableTasks = t.waitParentLocked(opts, t)
   938  		if wr != nil {
   939  			return wr, nil
   940  		}
   941  	}
   942  
   943  	if anyWaitableTasks {
   944  		return nil, ErrNoWaitableEvent
   945  	}
   946  	return nil, linuxerr.ECHILD
   947  }
   948  
   949  // Preconditions: The TaskSet mutex must be locked for writing.
   950  func (t *Task) waitParentLocked(opts *WaitOptions, parent *Task) (*WaitResult, bool) {
   951  	anyWaitableTasks := false
   952  
   953  	for child := range parent.children {
   954  		if !opts.matchesTask(child, parent.tg.pidns, false) {
   955  			continue
   956  		}
   957  		// Non-leaders don't notify parents on exit and aren't eligible to
   958  		// be waited on.
   959  		if opts.Events&EventExit != 0 && child == child.tg.leader && !child.exitParentAcked {
   960  			anyWaitableTasks = true
   961  			if wr := t.waitCollectZombieLocked(child, opts, false); wr != nil {
   962  				return wr, anyWaitableTasks
   963  			}
   964  		}
   965  		// Check for group stops and continues. Tasks that have passed
   966  		// TaskExitInitiated can no longer participate in group stops.
   967  		if opts.Events&(EventChildGroupStop|EventGroupContinue) == 0 {
   968  			continue
   969  		}
   970  		if child.exitState >= TaskExitInitiated {
   971  			continue
   972  		}
   973  		// If the waiter is in the same thread group as the task's
   974  		// tracer, do not report its group stops; they will be reported
   975  		// as ptrace stops instead. This also skips checking for group
   976  		// continues, but they'll be checked for when scanning tracees
   977  		// below. (Per kernel/exit.c:wait_consider_task(): "If a
   978  		// ptracer wants to distinguish the two events for its own
   979  		// children, it should create a separate process which takes
   980  		// the role of real parent.")
   981  		if tracer := child.Tracer(); tracer != nil && tracer.tg == parent.tg {
   982  			continue
   983  		}
   984  		anyWaitableTasks = true
   985  		if opts.Events&EventChildGroupStop != 0 {
   986  			if wr := t.waitCollectChildGroupStopLocked(child, opts); wr != nil {
   987  				return wr, anyWaitableTasks
   988  			}
   989  		}
   990  		if opts.Events&EventGroupContinue != 0 {
   991  			if wr := t.waitCollectGroupContinueLocked(child, opts); wr != nil {
   992  				return wr, anyWaitableTasks
   993  			}
   994  		}
   995  	}
   996  	for tracee := range parent.ptraceTracees {
   997  		if !opts.matchesTask(tracee, parent.tg.pidns, true) {
   998  			continue
   999  		}
  1000  		// Non-leaders do notify tracers on exit.
  1001  		if opts.Events&EventExit != 0 && !tracee.exitTracerAcked {
  1002  			anyWaitableTasks = true
  1003  			if wr := t.waitCollectZombieLocked(tracee, opts, true); wr != nil {
  1004  				return wr, anyWaitableTasks
  1005  			}
  1006  		}
  1007  		if opts.Events&(EventTraceeStop|EventGroupContinue) == 0 {
  1008  			continue
  1009  		}
  1010  		if tracee.exitState >= TaskExitInitiated {
  1011  			continue
  1012  		}
  1013  		anyWaitableTasks = true
  1014  		if opts.Events&EventTraceeStop != 0 {
  1015  			if wr := t.waitCollectTraceeStopLocked(tracee, opts); wr != nil {
  1016  				return wr, anyWaitableTasks
  1017  			}
  1018  		}
  1019  		if opts.Events&EventGroupContinue != 0 {
  1020  			if wr := t.waitCollectGroupContinueLocked(tracee, opts); wr != nil {
  1021  				return wr, anyWaitableTasks
  1022  			}
  1023  		}
  1024  	}
  1025  
  1026  	return nil, anyWaitableTasks
  1027  }
  1028  
  1029  // Preconditions: The TaskSet mutex must be locked for writing.
  1030  func (t *Task) waitCollectZombieLocked(target *Task, opts *WaitOptions, asPtracer bool) *WaitResult {
  1031  	if asPtracer && !target.exitTracerNotified {
  1032  		return nil
  1033  	}
  1034  	if !asPtracer && !target.exitParentNotified {
  1035  		return nil
  1036  	}
  1037  	// Zombied thread group leaders are never waitable until their thread group
  1038  	// is otherwise empty. Usually this is caught by the
  1039  	// target.exitParentNotified check above, but if t is both (in the thread
  1040  	// group of) target's tracer and parent, asPtracer may be true.
  1041  	if target == target.tg.leader && target.tg.tasksCount != 1 {
  1042  		return nil
  1043  	}
  1044  	pid := t.tg.pidns.tids[target]
  1045  	uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()
  1046  	status := target.exitStatus.Status()
  1047  	if !opts.ConsumeEvent {
  1048  		return &WaitResult{
  1049  			Task:   target,
  1050  			TID:    pid,
  1051  			UID:    uid,
  1052  			Event:  EventExit,
  1053  			Status: status,
  1054  		}
  1055  	}
  1056  	// Surprisingly, the exit status reported by a non-consuming wait can
  1057  	// differ from that reported by a consuming wait; the latter will return
  1058  	// the group exit code if one is available.
  1059  	if target.tg.exiting {
  1060  		status = target.tg.exitStatus.Status()
  1061  	}
  1062  	// t may be (in the thread group of) target's parent, tracer, or both. We
  1063  	// don't need to check for !exitTracerAcked because tracees are detached
  1064  	// here, and we don't need to check for !exitParentAcked because zombies
  1065  	// will be reaped here.
  1066  	if tracer := target.Tracer(); tracer != nil && tracer.tg == t.tg && target.exitTracerNotified {
  1067  		target.exitTracerAcked = true
  1068  		target.ptraceTracer.Store((*Task)(nil))
  1069  		delete(t.ptraceTracees, target)
  1070  	}
  1071  	if target.parent != nil && target.parent.tg == t.tg && target.exitParentNotified {
  1072  		target.exitParentAcked = true
  1073  		if target == target.tg.leader {
  1074  			// target.tg.exitedCPUStats doesn't include target.CPUStats() yet,
  1075  			// and won't until after target.exitNotifyLocked() (maybe). Include
  1076  			// target.CPUStats() explicitly. This is consistent with Linux,
  1077  			// which accounts an exited task's cputime to its thread group in
  1078  			// kernel/exit.c:release_task() => __exit_signal(), and uses
  1079  			// thread_group_cputime_adjusted() in wait_task_zombie().
  1080  			t.tg.childCPUStats.Accumulate(target.CPUStats())
  1081  			t.tg.childCPUStats.Accumulate(target.tg.exitedCPUStats)
  1082  			t.tg.childCPUStats.Accumulate(target.tg.childCPUStats)
  1083  			// Update t's child max resident set size. The size will be the maximum
  1084  			// of this thread's size and all its childrens' sizes.
  1085  			if t.tg.childMaxRSS < target.tg.maxRSS {
  1086  				t.tg.childMaxRSS = target.tg.maxRSS
  1087  			}
  1088  			if t.tg.childMaxRSS < target.tg.childMaxRSS {
  1089  				t.tg.childMaxRSS = target.tg.childMaxRSS
  1090  			}
  1091  		}
  1092  	}
  1093  	target.exitNotifyLocked(false)
  1094  	return &WaitResult{
  1095  		Task:   target,
  1096  		TID:    pid,
  1097  		UID:    uid,
  1098  		Event:  EventExit,
  1099  		Status: status,
  1100  	}
  1101  }
  1102  
  1103  // updateRSSLocked updates t.tg.maxRSS.
  1104  //
  1105  // Preconditions: The TaskSet mutex must be locked for writing.
  1106  func (t *Task) updateRSSLocked() {
  1107  	if mmMaxRSS := t.MemoryManager().MaxResidentSetSize(); t.tg.maxRSS < mmMaxRSS {
  1108  		t.tg.maxRSS = mmMaxRSS
  1109  	}
  1110  }
  1111  
  1112  // Preconditions: The TaskSet mutex must be locked for writing.
  1113  func (t *Task) waitCollectChildGroupStopLocked(target *Task, opts *WaitOptions) *WaitResult {
  1114  	target.tg.signalHandlers.mu.Lock()
  1115  	defer target.tg.signalHandlers.mu.Unlock()
  1116  	if !target.tg.groupStopWaitable {
  1117  		return nil
  1118  	}
  1119  	pid := t.tg.pidns.tids[target]
  1120  	uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()
  1121  	sig := target.tg.groupStopSignal
  1122  	if opts.ConsumeEvent {
  1123  		target.tg.groupStopWaitable = false
  1124  	}
  1125  	return &WaitResult{
  1126  		Task:  target,
  1127  		TID:   pid,
  1128  		UID:   uid,
  1129  		Event: EventChildGroupStop,
  1130  		// There is no name for these status constants.
  1131  		Status: (uint32(sig)&0xff)<<8 | 0x7f,
  1132  	}
  1133  }
  1134  
  1135  // Preconditions: The TaskSet mutex must be locked for writing.
  1136  func (t *Task) waitCollectGroupContinueLocked(target *Task, opts *WaitOptions) *WaitResult {
  1137  	target.tg.signalHandlers.mu.Lock()
  1138  	defer target.tg.signalHandlers.mu.Unlock()
  1139  	if !target.tg.groupContWaitable {
  1140  		return nil
  1141  	}
  1142  	pid := t.tg.pidns.tids[target]
  1143  	uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()
  1144  	if opts.ConsumeEvent {
  1145  		target.tg.groupContWaitable = false
  1146  	}
  1147  	return &WaitResult{
  1148  		Task:   target,
  1149  		TID:    pid,
  1150  		UID:    uid,
  1151  		Event:  EventGroupContinue,
  1152  		Status: 0xffff,
  1153  	}
  1154  }
  1155  
  1156  // Preconditions: The TaskSet mutex must be locked for writing.
  1157  func (t *Task) waitCollectTraceeStopLocked(target *Task, opts *WaitOptions) *WaitResult {
  1158  	target.tg.signalHandlers.mu.Lock()
  1159  	defer target.tg.signalHandlers.mu.Unlock()
  1160  	if target.stop == nil {
  1161  		return nil
  1162  	}
  1163  	if _, ok := target.stop.(*ptraceStop); !ok {
  1164  		return nil
  1165  	}
  1166  	if target.ptraceCode == 0 {
  1167  		return nil
  1168  	}
  1169  	pid := t.tg.pidns.tids[target]
  1170  	uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()
  1171  	code := target.ptraceCode
  1172  	if opts.ConsumeEvent {
  1173  		target.ptraceCode = 0
  1174  	}
  1175  	return &WaitResult{
  1176  		Task:   target,
  1177  		TID:    pid,
  1178  		UID:    uid,
  1179  		Event:  EventTraceeStop,
  1180  		Status: uint32(code)<<8 | 0x7f,
  1181  	}
  1182  }
  1183  
  1184  // ExitState returns t's current progress through the exit path.
  1185  func (t *Task) ExitState() TaskExitState {
  1186  	t.tg.pidns.owner.mu.RLock()
  1187  	defer t.tg.pidns.owner.mu.RUnlock()
  1188  	return t.exitState
  1189  }
  1190  
  1191  // ParentDeathSignal returns t's parent death signal.
  1192  func (t *Task) ParentDeathSignal() linux.Signal {
  1193  	t.mu.Lock()
  1194  	defer t.mu.Unlock()
  1195  	return t.parentDeathSignal
  1196  }
  1197  
  1198  // SetParentDeathSignal sets t's parent death signal.
  1199  func (t *Task) SetParentDeathSignal(sig linux.Signal) {
  1200  	t.mu.Lock()
  1201  	defer t.mu.Unlock()
  1202  	t.parentDeathSignal = sig
  1203  }