github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/kernel/thread_group.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	goContext "context"
    19  	"sync/atomic"
    20  
    21  	"github.com/metacubex/gvisor/pkg/abi/linux"
    22  	"github.com/metacubex/gvisor/pkg/atomicbitops"
    23  	"github.com/metacubex/gvisor/pkg/context"
    24  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    25  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    26  	ktime "github.com/metacubex/gvisor/pkg/sentry/kernel/time"
    27  	"github.com/metacubex/gvisor/pkg/sentry/limits"
    28  	"github.com/metacubex/gvisor/pkg/sentry/usage"
    29  	"github.com/metacubex/gvisor/pkg/sync"
    30  )
    31  
    32  // A ThreadGroup is a logical grouping of tasks that has widespread
    33  // significance to other kernel features (e.g. signal handling). ("Thread
    34  // groups" are usually called "processes" in userspace documentation.)
    35  //
    36  // ThreadGroup is a superset of Linux's struct signal_struct.
    37  //
    38  // +stateify savable
    39  type ThreadGroup struct {
    40  	threadGroupNode
    41  
    42  	// signalHandlers is the set of signal handlers used by every task in this
    43  	// thread group. (signalHandlers may also be shared with other thread
    44  	// groups.)
    45  	//
    46  	// signalHandlers.mu (hereafter "the signal mutex") protects state related
    47  	// to signal handling, as well as state that usually needs to be atomic
    48  	// with signal handling, for all ThreadGroups and Tasks using
    49  	// signalHandlers. (This is analogous to Linux's use of struct
    50  	// sighand_struct::siglock.)
    51  	//
    52  	// The signalHandlers pointer can only be mutated during an execve
    53  	// (Task.finishExec). Consequently, when it's possible for a task in the
    54  	// thread group to be completing an execve, signalHandlers is protected by
    55  	// the owning TaskSet.mu. Otherwise, it is possible to read the
    56  	// signalHandlers pointer without synchronization. In particular,
    57  	// completing an execve requires that all other tasks in the thread group
    58  	// have exited, so task goroutines do not need the owning TaskSet.mu to
    59  	// read the signalHandlers pointer of their thread groups.
    60  	signalHandlers *SignalHandlers
    61  
    62  	// pendingSignals is the set of pending signals that may be handled by any
    63  	// task in this thread group.
    64  	//
    65  	// pendingSignals is protected by the signal mutex.
    66  	pendingSignals pendingSignals
    67  
    68  	// If groupStopDequeued is true, a task in the thread group has dequeued a
    69  	// stop signal, but has not yet initiated the group stop.
    70  	//
    71  	// groupStopDequeued is analogous to Linux's JOBCTL_STOP_DEQUEUED.
    72  	//
    73  	// groupStopDequeued is protected by the signal mutex.
    74  	groupStopDequeued bool
    75  
    76  	// groupStopSignal is the signal that caused a group stop to be initiated.
    77  	//
    78  	// groupStopSignal is protected by the signal mutex.
    79  	groupStopSignal linux.Signal
    80  
    81  	// groupStopPendingCount is the number of active tasks in the thread group
    82  	// for which Task.groupStopPending is set.
    83  	//
    84  	// groupStopPendingCount is analogous to Linux's
    85  	// signal_struct::group_stop_count.
    86  	//
    87  	// groupStopPendingCount is protected by the signal mutex.
    88  	groupStopPendingCount int
    89  
    90  	// If groupStopComplete is true, groupStopPendingCount transitioned from
    91  	// non-zero to zero without an intervening SIGCONT.
    92  	//
    93  	// groupStopComplete is analogous to Linux's SIGNAL_STOP_STOPPED.
    94  	//
    95  	// groupStopComplete is protected by the signal mutex.
    96  	groupStopComplete bool
    97  
    98  	// If groupStopWaitable is true, the thread group is indicating a waitable
    99  	// group stop event (as defined by EventChildGroupStop).
   100  	//
   101  	// Linux represents the analogous state as SIGNAL_STOP_STOPPED being set
   102  	// and group_exit_code being non-zero.
   103  	//
   104  	// groupStopWaitable is protected by the signal mutex.
   105  	groupStopWaitable bool
   106  
   107  	// If groupContNotify is true, then a SIGCONT has recently ended a group
   108  	// stop on this thread group, and the first task to observe it should
   109  	// notify its parent. groupContInterrupted is true iff SIGCONT ended an
   110  	// incomplete group stop. If groupContNotify is false, groupContInterrupted is
   111  	// meaningless.
   112  	//
   113  	// Analogues in Linux:
   114  	//
   115  	//	- groupContNotify && groupContInterrupted is represented by
   116  	//		SIGNAL_CLD_STOPPED.
   117  	//
   118  	//	- groupContNotify && !groupContInterrupted is represented by
   119  	//		SIGNAL_CLD_CONTINUED.
   120  	//
   121  	//	- !groupContNotify is represented by neither flag being set.
   122  	//
   123  	// groupContNotify and groupContInterrupted are protected by the signal
   124  	// mutex.
   125  	groupContNotify      bool
   126  	groupContInterrupted bool
   127  
   128  	// If groupContWaitable is true, the thread group is indicating a waitable
   129  	// continue event (as defined by EventGroupContinue).
   130  	//
   131  	// groupContWaitable is analogous to Linux's SIGNAL_STOP_CONTINUED.
   132  	//
   133  	// groupContWaitable is protected by the signal mutex.
   134  	groupContWaitable bool
   135  
   136  	// exiting is true if all tasks in the ThreadGroup should exit. exiting is
   137  	// analogous to Linux's SIGNAL_GROUP_EXIT.
   138  	//
   139  	// exiting is protected by the signal mutex. exiting can only transition
   140  	// from false to true.
   141  	exiting bool
   142  
   143  	// exitStatus is the thread group's exit status.
   144  	//
   145  	// While exiting is false, exitStatus is protected by the signal mutex.
   146  	// When exiting becomes true, exitStatus becomes immutable.
   147  	exitStatus linux.WaitStatus
   148  
   149  	// terminationSignal is the signal that this thread group's leader will
   150  	// send to its parent when it exits.
   151  	//
   152  	// terminationSignal is protected by the TaskSet mutex.
   153  	terminationSignal linux.Signal
   154  
   155  	// liveGoroutines is the number of non-exited task goroutines in the thread
   156  	// group.
   157  	//
   158  	// liveGoroutines is not saved; it is reset as task goroutines are
   159  	// restarted by Task.Start.
   160  	liveGoroutines sync.WaitGroup `state:"nosave"`
   161  
   162  	timerMu threadGroupTimerMutex `state:"nosave"`
   163  
   164  	// itimerRealTimer implements ITIMER_REAL for the thread group.
   165  	itimerRealTimer *ktime.Timer
   166  
   167  	// itimerVirtSetting is the ITIMER_VIRTUAL setting for the thread group.
   168  	//
   169  	// itimerVirtSetting is protected by the signal mutex.
   170  	itimerVirtSetting ktime.Setting
   171  
   172  	// itimerProfSetting is the ITIMER_PROF setting for the thread group.
   173  	//
   174  	// itimerProfSetting is protected by the signal mutex.
   175  	itimerProfSetting ktime.Setting
   176  
   177  	// rlimitCPUSoftSetting is the setting for RLIMIT_CPU soft limit
   178  	// notifications for the thread group.
   179  	//
   180  	// rlimitCPUSoftSetting is protected by the signal mutex.
   181  	rlimitCPUSoftSetting ktime.Setting
   182  
   183  	// cpuTimersEnabled is non-zero if itimerVirtSetting.Enabled is true,
   184  	// itimerProfSetting.Enabled is true, rlimitCPUSoftSetting.Enabled is true,
   185  	// or limits.Get(CPU) is finite.
   186  	//
   187  	// cpuTimersEnabled is protected by the signal mutex.
   188  	cpuTimersEnabled atomicbitops.Uint32
   189  
   190  	// timers is the thread group's POSIX interval timers. nextTimerID is the
   191  	// TimerID at which allocation should begin searching for an unused ID.
   192  	//
   193  	// timers and nextTimerID are protected by timerMu.
   194  	timers      map[linux.TimerID]*IntervalTimer
   195  	nextTimerID linux.TimerID
   196  
   197  	// exitedCPUStats is the CPU usage for all exited tasks in the thread
   198  	// group. exitedCPUStats is protected by the TaskSet mutex.
   199  	exitedCPUStats usage.CPUStats
   200  
   201  	// childCPUStats is the CPU usage of all joined descendants of this thread
   202  	// group. childCPUStats is protected by the TaskSet mutex.
   203  	childCPUStats usage.CPUStats
   204  
   205  	// ioUsage is the I/O usage for all exited tasks in the thread group.
   206  	// The ioUsage pointer is immutable.
   207  	ioUsage *usage.IO
   208  
   209  	// maxRSS is the historical maximum resident set size of the thread group, updated when:
   210  	//
   211  	//	- A task in the thread group exits, since after all tasks have
   212  	//		exited the MemoryManager is no longer reachable.
   213  	//
   214  	//	- The thread group completes an execve, since this changes
   215  	//		MemoryManagers.
   216  	//
   217  	// maxRSS is protected by the TaskSet mutex.
   218  	maxRSS uint64
   219  
   220  	// childMaxRSS is the maximum resident set size in bytes of all joined
   221  	// descendants of this thread group.
   222  	//
   223  	// childMaxRSS is protected by the TaskSet mutex.
   224  	childMaxRSS uint64
   225  
   226  	// Resource limits for this ThreadGroup. The limits pointer is immutable.
   227  	limits *limits.LimitSet
   228  
   229  	// processGroup is the processGroup for this thread group.
   230  	//
   231  	// processGroup is protected by the TaskSet mutex.
   232  	processGroup *ProcessGroup
   233  
   234  	// execed indicates an exec has occurred since creation. This will be
   235  	// set by finishExec, and new TheadGroups will have this field cleared.
   236  	// When execed is set, the processGroup may no longer be changed.
   237  	//
   238  	// execed is protected by the TaskSet mutex.
   239  	execed bool
   240  
   241  	// oldRSeqCritical is the thread group's old rseq critical region.
   242  	oldRSeqCritical atomic.Pointer[OldRSeqCriticalRegion] `state:".(*OldRSeqCriticalRegion)"`
   243  
   244  	// tty is the thread group's controlling terminal. If nil, there is no
   245  	// controlling terminal.
   246  	//
   247  	// tty is protected by the signal mutex.
   248  	tty *TTY
   249  
   250  	// oomScoreAdj is the thread group's OOM score adjustment. This is
   251  	// currently not used but is maintained for consistency.
   252  	// TODO(gvisor.dev/issue/1967)
   253  	oomScoreAdj atomicbitops.Int32
   254  
   255  	// isChildSubreaper and hasChildSubreaper correspond to Linux's
   256  	// signal_struct::is_child_subreaper and has_child_subreaper.
   257  	//
   258  	// Both fields are protected by the TaskSet mutex.
   259  	//
   260  	// Quoting from signal.h:
   261  	// "PR_SET_CHILD_SUBREAPER marks a process, like a service manager, to
   262  	// re-parent orphan (double-forking) child processes to this process
   263  	// instead of 'init'. The service manager is able to receive SIGCHLD
   264  	// signals and is able to investigate the process until it calls
   265  	// wait(). All children of this process will inherit a flag if they
   266  	// should look for a child_subreaper process at exit"
   267  	isChildSubreaper  bool
   268  	hasChildSubreaper bool
   269  }
   270  
   271  // NewThreadGroup returns a new, empty thread group in PID namespace pidns. The
   272  // thread group leader will send its parent terminationSignal when it exits.
   273  // The new thread group isn't visible to the system until a task has been
   274  // created inside of it by a successful call to TaskSet.NewTask.
   275  func (k *Kernel) NewThreadGroup(pidns *PIDNamespace, sh *SignalHandlers, terminationSignal linux.Signal, limits *limits.LimitSet) *ThreadGroup {
   276  	tg := &ThreadGroup{
   277  		threadGroupNode: threadGroupNode{
   278  			pidns: pidns,
   279  		},
   280  		signalHandlers:    sh,
   281  		terminationSignal: terminationSignal,
   282  		ioUsage:           &usage.IO{},
   283  		limits:            limits,
   284  	}
   285  	tg.itimerRealTimer = ktime.NewTimer(k.timekeeper.monotonicClock, &itimerRealListener{tg: tg})
   286  	tg.timers = make(map[linux.TimerID]*IntervalTimer)
   287  	tg.oldRSeqCritical.Store(&OldRSeqCriticalRegion{})
   288  	return tg
   289  }
   290  
   291  // saveOldRSeqCritical is invoked by stateify.
   292  func (tg *ThreadGroup) saveOldRSeqCritical() *OldRSeqCriticalRegion {
   293  	return tg.oldRSeqCritical.Load()
   294  }
   295  
   296  // loadOldRSeqCritical is invoked by stateify.
   297  func (tg *ThreadGroup) loadOldRSeqCritical(_ goContext.Context, r *OldRSeqCriticalRegion) {
   298  	tg.oldRSeqCritical.Store(r)
   299  }
   300  
   301  // SignalHandlers returns the signal handlers used by tg.
   302  //
   303  // Preconditions: The caller must provide the synchronization required to read
   304  // tg.signalHandlers, as described in the field's comment.
   305  func (tg *ThreadGroup) SignalHandlers() *SignalHandlers {
   306  	return tg.signalHandlers
   307  }
   308  
   309  // Limits returns tg's limits.
   310  func (tg *ThreadGroup) Limits() *limits.LimitSet {
   311  	return tg.limits
   312  }
   313  
   314  // Release releases the thread group's resources.
   315  func (tg *ThreadGroup) Release(ctx context.Context) {
   316  	// Timers must be destroyed without holding the TaskSet or signal mutexes
   317  	// since timers send signals with Timer.mu locked.
   318  	tg.itimerRealTimer.Destroy()
   319  	var its []*IntervalTimer
   320  	tg.pidns.owner.mu.Lock()
   321  	tg.signalHandlers.mu.Lock()
   322  	for _, it := range tg.timers {
   323  		its = append(its, it)
   324  	}
   325  	tg.timers = make(map[linux.TimerID]*IntervalTimer) // nil maps can't be saved
   326  	// Disassociate from the tty if we have one.
   327  	if tg.tty != nil {
   328  		tg.tty.mu.Lock()
   329  		if tg.tty.tg == tg {
   330  			tg.tty.tg = nil
   331  		}
   332  		tg.tty.mu.Unlock()
   333  		tg.tty = nil
   334  	}
   335  	tg.signalHandlers.mu.Unlock()
   336  	tg.pidns.owner.mu.Unlock()
   337  	for _, it := range its {
   338  		it.DestroyTimer()
   339  	}
   340  }
   341  
   342  // forEachChildThreadGroupLocked indicates over all child ThreadGroups.
   343  //
   344  // Precondition: TaskSet.mu must be held.
   345  func (tg *ThreadGroup) forEachChildThreadGroupLocked(fn func(*ThreadGroup)) {
   346  	tg.walkDescendantThreadGroupsLocked(func(child *ThreadGroup) bool {
   347  		fn(child)
   348  		// Don't recurse below the immediate children.
   349  		return false
   350  	})
   351  }
   352  
   353  // walkDescendantThreadGroupsLocked recursively walks all descendent
   354  // ThreadGroups and executes the visitor function. If visitor returns false for
   355  // a given ThreadGroup, then that ThreadGroups descendants are excluded from
   356  // further iteration.
   357  //
   358  // This corresponds to Linux's walk_process_tree.
   359  //
   360  // Precondition: TaskSet.mu must be held.
   361  func (tg *ThreadGroup) walkDescendantThreadGroupsLocked(visitor func(*ThreadGroup) bool) {
   362  	for t := tg.tasks.Front(); t != nil; t = t.Next() {
   363  		for child := range t.children {
   364  			if child == child.tg.leader {
   365  				if !visitor(child.tg) {
   366  					// Don't recurse below child.
   367  					continue
   368  				}
   369  				child.tg.walkDescendantThreadGroupsLocked(visitor)
   370  			}
   371  		}
   372  	}
   373  }
   374  
   375  // SetControllingTTY sets tty as the controlling terminal of tg.
   376  func (tg *ThreadGroup) SetControllingTTY(tty *TTY, steal bool, isReadable bool) error {
   377  	tty.mu.Lock()
   378  	defer tty.mu.Unlock()
   379  
   380  	// We might be asked to set the controlling terminal of multiple
   381  	// processes, so we lock both the TaskSet and SignalHandlers.
   382  	tg.pidns.owner.mu.Lock()
   383  	defer tg.pidns.owner.mu.Unlock()
   384  	tg.signalHandlers.mu.Lock()
   385  	defer tg.signalHandlers.mu.Unlock()
   386  
   387  	// "The calling process must be a session leader and not have a
   388  	// controlling terminal already." - tty_ioctl(4)
   389  	if tg.processGroup.session.leader != tg {
   390  		return linuxerr.EINVAL
   391  	}
   392  	if tg.tty == tty {
   393  		return nil
   394  	} else if tg.tty != nil {
   395  		return linuxerr.EINVAL
   396  	}
   397  
   398  	creds := auth.CredentialsFromContext(tg.leader)
   399  	hasAdmin := creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, creds.UserNamespace.Root())
   400  
   401  	// "If this terminal is already the controlling terminal of a different
   402  	// session group, then the ioctl fails with EPERM, unless the caller
   403  	// has the CAP_SYS_ADMIN capability and arg equals 1, in which case the
   404  	// terminal is stolen, and all processes that had it as controlling
   405  	// terminal lose it." - tty_ioctl(4)
   406  	if tty.tg != nil && tg.processGroup.session != tty.tg.processGroup.session {
   407  		// Stealing requires CAP_SYS_ADMIN in the root user namespace.
   408  		if !hasAdmin || !steal {
   409  			return linuxerr.EPERM
   410  		}
   411  		// Steal the TTY away. Unlike TIOCNOTTY, don't send signals.
   412  		for othertg := range tg.pidns.owner.Root.tgids {
   413  			// This won't deadlock by locking tg.signalHandlers
   414  			// because at this point:
   415  			//	- We only lock signalHandlers if it's in the same
   416  			//		session as the tty's controlling thread group.
   417  			//	- We know that the calling thread group is not in
   418  			//		the same session as the tty's controlling thread
   419  			//		group.
   420  			if othertg.processGroup.session == tty.tg.processGroup.session {
   421  				othertg.signalHandlers.mu.NestedLock(signalHandlersLockTg)
   422  				othertg.tty = nil
   423  				othertg.signalHandlers.mu.NestedUnlock(signalHandlersLockTg)
   424  			}
   425  		}
   426  	}
   427  
   428  	if !isReadable && !hasAdmin {
   429  		return linuxerr.EPERM
   430  	}
   431  
   432  	// Set the controlling terminal and foreground process group.
   433  	tg.tty = tty
   434  	tg.processGroup.session.foreground = tg.processGroup
   435  	// Set this as the controlling process of the terminal.
   436  	tty.tg = tg
   437  
   438  	return nil
   439  }
   440  
   441  // ReleaseControllingTTY gives up tty as the controlling tty of tg.
   442  func (tg *ThreadGroup) ReleaseControllingTTY(tty *TTY) error {
   443  	tty.mu.Lock()
   444  	defer tty.mu.Unlock()
   445  
   446  	// We might be asked to set the controlling terminal of multiple
   447  	// processes, so we lock both the TaskSet and SignalHandlers.
   448  	tg.pidns.owner.mu.RLock()
   449  	defer tg.pidns.owner.mu.RUnlock()
   450  
   451  	// Just below, we may re-lock signalHandlers in order to send signals.
   452  	// Thus we can't defer Unlock here.
   453  	tg.signalHandlers.mu.Lock()
   454  
   455  	if tg.tty == nil || tg.tty != tty {
   456  		tg.signalHandlers.mu.Unlock()
   457  		return linuxerr.ENOTTY
   458  	}
   459  
   460  	// "If the process was session leader, then send SIGHUP and SIGCONT to
   461  	// the foreground process group and all processes in the current
   462  	// session lose their controlling terminal." - tty_ioctl(4)
   463  	// Remove tty as the controlling tty for each process in the session,
   464  	// then send them SIGHUP and SIGCONT.
   465  
   466  	// If we're not the session leader, we don't have to do much.
   467  	if tty.tg != tg {
   468  		tg.tty = nil
   469  		tg.signalHandlers.mu.Unlock()
   470  		return nil
   471  	}
   472  
   473  	tg.signalHandlers.mu.Unlock()
   474  
   475  	// We're the session leader. SIGHUP and SIGCONT the foreground process
   476  	// group and remove all controlling terminals in the session.
   477  	var lastErr error
   478  	for othertg := range tg.pidns.owner.Root.tgids {
   479  		if othertg.processGroup.session == tg.processGroup.session {
   480  			othertg.signalHandlers.mu.Lock()
   481  			othertg.tty = nil
   482  			if othertg.processGroup == tg.processGroup.session.foreground {
   483  				if err := othertg.leader.sendSignalLocked(&linux.SignalInfo{Signo: int32(linux.SIGHUP)}, true /* group */); err != nil {
   484  					lastErr = err
   485  				}
   486  				if err := othertg.leader.sendSignalLocked(&linux.SignalInfo{Signo: int32(linux.SIGCONT)}, true /* group */); err != nil {
   487  					lastErr = err
   488  				}
   489  			}
   490  			othertg.signalHandlers.mu.Unlock()
   491  		}
   492  	}
   493  
   494  	return lastErr
   495  }
   496  
   497  // ForegroundProcessGroupID returns the foreground process group ID of the
   498  // thread group.
   499  func (tg *ThreadGroup) ForegroundProcessGroupID(tty *TTY) (ProcessGroupID, error) {
   500  	tty.mu.Lock()
   501  	defer tty.mu.Unlock()
   502  
   503  	tg.pidns.owner.mu.Lock()
   504  	defer tg.pidns.owner.mu.Unlock()
   505  	tg.signalHandlers.mu.Lock()
   506  	defer tg.signalHandlers.mu.Unlock()
   507  
   508  	// fd must refer to the controlling terminal of the calling process.
   509  	// See tcgetpgrp(3)
   510  	if tg.tty != tty {
   511  		return 0, linuxerr.ENOTTY
   512  	}
   513  
   514  	return tg.processGroup.session.foreground.id, nil
   515  }
   516  
   517  // SetForegroundProcessGroupID sets the foreground process group of tty to
   518  // pgid.
   519  func (tg *ThreadGroup) SetForegroundProcessGroupID(tty *TTY, pgid ProcessGroupID) error {
   520  	tty.mu.Lock()
   521  	defer tty.mu.Unlock()
   522  
   523  	tg.pidns.owner.mu.Lock()
   524  	defer tg.pidns.owner.mu.Unlock()
   525  	tg.signalHandlers.mu.Lock()
   526  	defer tg.signalHandlers.mu.Unlock()
   527  
   528  	// tty must be the controlling terminal.
   529  	if tg.tty != tty {
   530  		return linuxerr.ENOTTY
   531  	}
   532  
   533  	// pgid must be positive.
   534  	if pgid < 0 {
   535  		return linuxerr.EINVAL
   536  	}
   537  
   538  	// pg must not be empty. Empty process groups are removed from their
   539  	// pid namespaces.
   540  	pg, ok := tg.pidns.processGroups[pgid]
   541  	if !ok {
   542  		return linuxerr.ESRCH
   543  	}
   544  
   545  	// pg must be part of this process's session.
   546  	if tg.processGroup.session != pg.session {
   547  		return linuxerr.EPERM
   548  	}
   549  
   550  	signalAction := tg.signalHandlers.actions[linux.SIGTTOU]
   551  	// If the calling process is a member of a background group, a SIGTTOU
   552  	// signal is sent to all members of this background process group.
   553  	// We need also need to check whether it is ignoring or blocking SIGTTOU.
   554  	ignored := signalAction.Handler == linux.SIG_IGN
   555  	blocked := (linux.SignalSet(tg.leader.signalMask.RacyLoad()) & linux.SignalSetOf(linux.SIGTTOU)) != 0
   556  	if tg.processGroup.id != tg.processGroup.session.foreground.id && !ignored && !blocked {
   557  		tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGTTOU), true)
   558  		return linuxerr.ERESTARTSYS
   559  	}
   560  
   561  	tg.processGroup.session.foreground = pg
   562  	return nil
   563  }
   564  
   565  // SetChildSubreaper marks this ThreadGroup sets the isChildSubreaper field on
   566  // this ThreadGroup, and marks all child ThreadGroups as having a subreaper.
   567  // Recursion stops if we find another subreaper process, which is either a
   568  // ThreadGroup with isChildSubreaper bit set, or a ThreadGroup with PID=1
   569  // inside a PID namespace.
   570  func (tg *ThreadGroup) SetChildSubreaper(isSubreaper bool) {
   571  	ts := tg.TaskSet()
   572  	ts.mu.Lock()
   573  	defer ts.mu.Unlock()
   574  	tg.isChildSubreaper = isSubreaper
   575  	tg.walkDescendantThreadGroupsLocked(func(child *ThreadGroup) bool {
   576  		// Is this child PID 1 in its PID namespace, or already a
   577  		// subreaper?
   578  		if child.isInitInLocked(child.PIDNamespace()) || child.isChildSubreaper {
   579  			// Don't set hasChildSubreaper, and don't recurse.
   580  			return false
   581  		}
   582  		child.hasChildSubreaper = isSubreaper
   583  		return true // Recurse.
   584  	})
   585  }
   586  
   587  // IsChildSubreaper returns whether this ThreadGroup is a child subreaper.
   588  func (tg *ThreadGroup) IsChildSubreaper() bool {
   589  	ts := tg.TaskSet()
   590  	ts.mu.RLock()
   591  	defer ts.mu.RUnlock()
   592  	return tg.isChildSubreaper
   593  }
   594  
   595  // IsInitIn returns whether this ThreadGroup has TID 1 int the given
   596  // PIDNamespace.
   597  func (tg *ThreadGroup) IsInitIn(pidns *PIDNamespace) bool {
   598  	ts := tg.TaskSet()
   599  	ts.mu.RLock()
   600  	defer ts.mu.RUnlock()
   601  	return tg.isInitInLocked(pidns)
   602  }
   603  
   604  // isInitInLocked returns whether this ThreadGroup has TID 1 in the given
   605  // PIDNamespace.
   606  //
   607  // Preconditions: TaskSet.mu must be locked.
   608  func (tg *ThreadGroup) isInitInLocked(pidns *PIDNamespace) bool {
   609  	return pidns.tgids[tg] == initTID
   610  }
   611  
   612  // itimerRealListener implements ktime.Listener for ITIMER_REAL expirations.
   613  //
   614  // +stateify savable
   615  type itimerRealListener struct {
   616  	tg *ThreadGroup
   617  }
   618  
   619  // NotifyTimer implements ktime.TimerListener.NotifyTimer.
   620  func (l *itimerRealListener) NotifyTimer(exp uint64, setting ktime.Setting) (ktime.Setting, bool) {
   621  	l.tg.SendSignal(SignalInfoPriv(linux.SIGALRM))
   622  	return ktime.Setting{}, false
   623  }