github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/sessions.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    19  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    20  	"github.com/SagerNet/gvisor/pkg/syserror"
    21  )
    22  
    23  // SessionID is the public identifier.
    24  type SessionID ThreadID
    25  
    26  // ProcessGroupID is the public identifier.
    27  type ProcessGroupID ThreadID
    28  
    29  // Session contains a leader threadgroup and a list of ProcessGroups.
    30  //
    31  // +stateify savable
    32  type Session struct {
    33  	SessionRefs
    34  
    35  	// leader is the originator of the Session.
    36  	//
    37  	// Note that this may no longer be running (and may be reaped), so the
    38  	// ID is cached upon initial creation. The leader is still required
    39  	// however, since its PIDNamespace defines the scope of the Session.
    40  	//
    41  	// The leader is immutable.
    42  	leader *ThreadGroup
    43  
    44  	// id is the cached identifier in the leader's namespace.
    45  	//
    46  	// The id is immutable.
    47  	id SessionID
    48  
    49  	// foreground is the foreground process group.
    50  	//
    51  	// This is protected by TaskSet.mu.
    52  	foreground *ProcessGroup
    53  
    54  	// ProcessGroups is a list of process groups in this Session. This is
    55  	// protected by TaskSet.mu.
    56  	processGroups processGroupList
    57  
    58  	// sessionEntry is the embed for TaskSet.sessions. This is protected by
    59  	// TaskSet.mu.
    60  	sessionEntry
    61  }
    62  
    63  // DecRef drops a reference.
    64  //
    65  // Precondition: callers must hold TaskSet.mu for writing.
    66  func (s *Session) DecRef() {
    67  	s.SessionRefs.DecRef(func() {
    68  		// Remove translations from the leader.
    69  		for ns := s.leader.pidns; ns != nil; ns = ns.parent {
    70  			id := ns.sids[s]
    71  			delete(ns.sids, s)
    72  			delete(ns.sessions, id)
    73  		}
    74  
    75  		// Remove from the list of global Sessions.
    76  		s.leader.pidns.owner.sessions.Remove(s)
    77  	})
    78  }
    79  
    80  // ProcessGroup contains an originator threadgroup and a parent Session.
    81  //
    82  // +stateify savable
    83  type ProcessGroup struct {
    84  	refs ProcessGroupRefs
    85  
    86  	// originator is the originator of the group.
    87  	//
    88  	// See note re: leader in Session. The same applies here.
    89  	//
    90  	// The originator is immutable.
    91  	originator *ThreadGroup
    92  
    93  	// id is the cached identifier in the originator's namespace.
    94  	//
    95  	// The id is immutable.
    96  	id ProcessGroupID
    97  
    98  	// Session is the parent Session.
    99  	//
   100  	// The session is immutable.
   101  	session *Session
   102  
   103  	// ancestors is the number of thread groups in this process group whose
   104  	// parent is in a different process group in the same session.
   105  	//
   106  	// The name is derived from the fact that process groups where
   107  	// ancestors is zero are considered "orphans".
   108  	//
   109  	// ancestors is protected by TaskSet.mu.
   110  	ancestors uint32
   111  
   112  	// processGroupEntry is the embedded entry for Sessions.groups. This is
   113  	// protected by TaskSet.mu.
   114  	processGroupEntry
   115  }
   116  
   117  // Originator retuns the originator of the process group.
   118  func (pg *ProcessGroup) Originator() *ThreadGroup {
   119  	return pg.originator
   120  }
   121  
   122  // IsOrphan returns true if this process group is an orphan.
   123  func (pg *ProcessGroup) IsOrphan() bool {
   124  	ts := pg.originator.TaskSet()
   125  	ts.mu.RLock()
   126  	defer ts.mu.RUnlock()
   127  	return pg.ancestors == 0
   128  }
   129  
   130  // incRefWithParent grabs a reference.
   131  //
   132  // This function is called when this ProcessGroup is being associated with some
   133  // new ThreadGroup, tg. parentPG is the ProcessGroup of tg's parent
   134  // ThreadGroup. If tg is init, then parentPG may be nil.
   135  //
   136  // Precondition: callers must hold TaskSet.mu for writing.
   137  func (pg *ProcessGroup) incRefWithParent(parentPG *ProcessGroup) {
   138  	// We acquire an "ancestor" reference in the case of a nil parent.
   139  	// This is because the process being associated is init, and init can
   140  	// never be orphaned (we count it as always having an ancestor).
   141  	if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) {
   142  		pg.ancestors++
   143  	}
   144  
   145  	pg.refs.IncRef()
   146  }
   147  
   148  // decRefWithParent drops a reference.
   149  //
   150  // parentPG is per incRefWithParent.
   151  //
   152  // Precondition: callers must hold TaskSet.mu for writing.
   153  func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) {
   154  	// See incRefWithParent regarding parent == nil.
   155  	if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) {
   156  		pg.ancestors--
   157  	}
   158  
   159  	alive := true
   160  	pg.refs.DecRef(func() {
   161  		alive = false // don't bother with handleOrphan.
   162  
   163  		// Remove translations from the originator.
   164  		for ns := pg.originator.pidns; ns != nil; ns = ns.parent {
   165  			id := ns.pgids[pg]
   166  			delete(ns.pgids, pg)
   167  			delete(ns.processGroups, id)
   168  		}
   169  
   170  		// Remove the list of process groups.
   171  		pg.session.processGroups.Remove(pg)
   172  		pg.session.DecRef()
   173  	})
   174  	if alive {
   175  		pg.handleOrphan()
   176  	}
   177  }
   178  
   179  // parentPG returns the parent process group.
   180  //
   181  // Precondition: callers must hold TaskSet.mu.
   182  func (tg *ThreadGroup) parentPG() *ProcessGroup {
   183  	if tg.leader.parent != nil {
   184  		return tg.leader.parent.tg.processGroup
   185  	}
   186  	return nil
   187  }
   188  
   189  // handleOrphan checks whether the process group is an orphan and has any
   190  // stopped jobs. If yes, then appropriate signals are delivered to each thread
   191  // group within the process group.
   192  //
   193  // Precondition: callers must hold TaskSet.mu for writing.
   194  func (pg *ProcessGroup) handleOrphan() {
   195  	// Check if this process is an orphan.
   196  	if pg.ancestors != 0 {
   197  		return
   198  	}
   199  
   200  	// See if there are any stopped jobs.
   201  	hasStopped := false
   202  	pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) {
   203  		if tg.processGroup != pg {
   204  			return
   205  		}
   206  		tg.signalHandlers.mu.Lock()
   207  		if tg.groupStopComplete {
   208  			hasStopped = true
   209  		}
   210  		tg.signalHandlers.mu.Unlock()
   211  	})
   212  	if !hasStopped {
   213  		return
   214  	}
   215  
   216  	// Deliver appropriate signals to all thread groups.
   217  	pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) {
   218  		if tg.processGroup != pg {
   219  			return
   220  		}
   221  		tg.signalHandlers.mu.Lock()
   222  		tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGHUP), true /* group */)
   223  		tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGCONT), true /* group */)
   224  		tg.signalHandlers.mu.Unlock()
   225  	})
   226  
   227  	return
   228  }
   229  
   230  // Session returns the process group's session without taking a reference.
   231  func (pg *ProcessGroup) Session() *Session {
   232  	return pg.session
   233  }
   234  
   235  // SendSignal sends a signal to all processes inside the process group. It is
   236  // analagous to kernel/signal.c:kill_pgrp.
   237  func (pg *ProcessGroup) SendSignal(info *linux.SignalInfo) error {
   238  	tasks := pg.originator.TaskSet()
   239  	tasks.mu.RLock()
   240  	defer tasks.mu.RUnlock()
   241  
   242  	var lastErr error
   243  	for tg := range tasks.Root.tgids {
   244  		if tg.processGroup == pg {
   245  			tg.signalHandlers.mu.Lock()
   246  			infoCopy := *info
   247  			if err := tg.leader.sendSignalLocked(&infoCopy, true /*group*/); err != nil {
   248  				lastErr = err
   249  			}
   250  			tg.signalHandlers.mu.Unlock()
   251  		}
   252  	}
   253  	return lastErr
   254  }
   255  
   256  // CreateSession creates a new Session, with the ThreadGroup as the leader.
   257  //
   258  // EPERM may be returned if either the given ThreadGroup is already a Session
   259  // leader, or a ProcessGroup already exists for the ThreadGroup's ID.
   260  func (tg *ThreadGroup) CreateSession() error {
   261  	tg.pidns.owner.mu.Lock()
   262  	defer tg.pidns.owner.mu.Unlock()
   263  	tg.signalHandlers.mu.Lock()
   264  	defer tg.signalHandlers.mu.Unlock()
   265  	return tg.createSession()
   266  }
   267  
   268  // createSession creates a new session for a threadgroup.
   269  //
   270  // Precondition: callers must hold TaskSet.mu and the signal mutex for writing.
   271  func (tg *ThreadGroup) createSession() error {
   272  	// Get the ID for this thread in the current namespace.
   273  	id := tg.pidns.tgids[tg]
   274  
   275  	// Check if this ThreadGroup already leads a Session, or
   276  	// if the proposed group is already taken.
   277  	for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() {
   278  		if s.leader.pidns != tg.pidns {
   279  			continue
   280  		}
   281  		if s.leader == tg {
   282  			return linuxerr.EPERM
   283  		}
   284  		if s.id == SessionID(id) {
   285  			return linuxerr.EPERM
   286  		}
   287  		for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() {
   288  			if pg.id == ProcessGroupID(id) {
   289  				return linuxerr.EPERM
   290  			}
   291  		}
   292  	}
   293  
   294  	// Create a new Session, with a single reference.
   295  	s := &Session{
   296  		id:     SessionID(id),
   297  		leader: tg,
   298  	}
   299  	s.InitRefs()
   300  
   301  	// Create a new ProcessGroup, belonging to that Session.
   302  	// This also has a single reference (assigned below).
   303  	//
   304  	// Note that since this is a new session and a new process group, there
   305  	// will be zero ancestors for this process group. (It is an orphan at
   306  	// this point.)
   307  	pg := &ProcessGroup{
   308  		id:         ProcessGroupID(id),
   309  		originator: tg,
   310  		session:    s,
   311  		ancestors:  0,
   312  	}
   313  	pg.refs.InitRefs()
   314  
   315  	// Tie them and return the result.
   316  	s.processGroups.PushBack(pg)
   317  	tg.pidns.owner.sessions.PushBack(s)
   318  
   319  	// Leave the current group, and assign the new one.
   320  	if tg.processGroup != nil {
   321  		oldParentPG := tg.parentPG()
   322  		tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
   323  			childTG.processGroup.incRefWithParent(pg)
   324  			childTG.processGroup.decRefWithParent(oldParentPG)
   325  		})
   326  		// If tg.processGroup is an orphan, decRefWithParent will lock
   327  		// the signal mutex of each thread group in tg.processGroup.
   328  		// However, tg's signal mutex may already be locked at this
   329  		// point. We change tg's process group before calling
   330  		// decRefWithParent to avoid locking tg's signal mutex twice.
   331  		oldPG := tg.processGroup
   332  		tg.processGroup = pg
   333  		oldPG.decRefWithParent(oldParentPG)
   334  	} else {
   335  		// The current process group may be nil only in the case of an
   336  		// unparented thread group (i.e. the init process). This would
   337  		// not normally occur, but we allow it for the convenience of
   338  		// CreateSession working from that point. There will be no
   339  		// child processes. We always say that the very first group
   340  		// created has ancestors (avoids checks elsewhere).
   341  		//
   342  		// Note that this mirrors the parent == nil logic in
   343  		// incRef/decRef/reparent, which counts nil as an ancestor.
   344  		tg.processGroup = pg
   345  		tg.processGroup.ancestors++
   346  	}
   347  
   348  	// Ensure a translation is added to all namespaces.
   349  	for ns := tg.pidns; ns != nil; ns = ns.parent {
   350  		local := ns.tgids[tg]
   351  		ns.sids[s] = SessionID(local)
   352  		ns.sessions[SessionID(local)] = s
   353  		ns.pgids[pg] = ProcessGroupID(local)
   354  		ns.processGroups[ProcessGroupID(local)] = pg
   355  	}
   356  
   357  	// Disconnect from the controlling terminal.
   358  	tg.tty = nil
   359  
   360  	return nil
   361  }
   362  
   363  // CreateProcessGroup creates a new process group.
   364  //
   365  // An EPERM error will be returned if the ThreadGroup belongs to a different
   366  // Session, is a Session leader or the group already exists.
   367  func (tg *ThreadGroup) CreateProcessGroup() error {
   368  	tg.pidns.owner.mu.Lock()
   369  	defer tg.pidns.owner.mu.Unlock()
   370  
   371  	// Get the ID for this thread in the current namespace.
   372  	id := tg.pidns.tgids[tg]
   373  
   374  	// Check whether a process still exists or not.
   375  	if id == 0 {
   376  		return syserror.ESRCH
   377  	}
   378  
   379  	// Per above, check for a Session leader or existing group.
   380  	for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() {
   381  		if s.leader.pidns != tg.pidns {
   382  			continue
   383  		}
   384  		if s.leader == tg {
   385  			return linuxerr.EPERM
   386  		}
   387  		for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() {
   388  			if pg.id == ProcessGroupID(id) {
   389  				return linuxerr.EPERM
   390  			}
   391  		}
   392  	}
   393  
   394  	// Create a new ProcessGroup, belonging to the current Session.
   395  	//
   396  	// We manually adjust the ancestors if the parent is in the same
   397  	// session.
   398  	tg.processGroup.session.IncRef()
   399  	pg := ProcessGroup{
   400  		id:         ProcessGroupID(id),
   401  		originator: tg,
   402  		session:    tg.processGroup.session,
   403  	}
   404  	pg.refs.InitRefs()
   405  
   406  	if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session {
   407  		pg.ancestors++
   408  	}
   409  
   410  	// Assign the new process group; adjust children.
   411  	oldParentPG := tg.parentPG()
   412  	tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
   413  		childTG.processGroup.incRefWithParent(&pg)
   414  		childTG.processGroup.decRefWithParent(oldParentPG)
   415  	})
   416  	tg.processGroup.decRefWithParent(oldParentPG)
   417  	tg.processGroup = &pg
   418  
   419  	// Add the new process group to the session.
   420  	pg.session.processGroups.PushBack(&pg)
   421  
   422  	// Ensure this translation is added to all namespaces.
   423  	for ns := tg.pidns; ns != nil; ns = ns.parent {
   424  		local := ns.tgids[tg]
   425  		ns.pgids[&pg] = ProcessGroupID(local)
   426  		ns.processGroups[ProcessGroupID(local)] = &pg
   427  	}
   428  
   429  	return nil
   430  }
   431  
   432  // JoinProcessGroup joins an existing process group.
   433  //
   434  // This function will return EACCES if an exec has been performed since fork
   435  // by the given ThreadGroup, and EPERM if the Sessions are not the same or the
   436  // group does not exist.
   437  //
   438  // If checkExec is set, then the join is not permitted after the process has
   439  // executed exec at least once.
   440  func (tg *ThreadGroup) JoinProcessGroup(pidns *PIDNamespace, pgid ProcessGroupID, checkExec bool) error {
   441  	pidns.owner.mu.Lock()
   442  	defer pidns.owner.mu.Unlock()
   443  
   444  	// Lookup the ProcessGroup.
   445  	pg := pidns.processGroups[pgid]
   446  	if pg == nil {
   447  		return linuxerr.EPERM
   448  	}
   449  
   450  	// Disallow the join if an execve has performed, per POSIX.
   451  	if checkExec && tg.execed {
   452  		return linuxerr.EACCES
   453  	}
   454  
   455  	// See if it's in the same session as ours.
   456  	if pg.session != tg.processGroup.session {
   457  		return linuxerr.EPERM
   458  	}
   459  
   460  	// Join the group; adjust children.
   461  	parentPG := tg.parentPG()
   462  	pg.incRefWithParent(parentPG)
   463  	tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
   464  		childTG.processGroup.incRefWithParent(pg)
   465  		childTG.processGroup.decRefWithParent(tg.processGroup)
   466  	})
   467  	tg.processGroup.decRefWithParent(parentPG)
   468  	tg.processGroup = pg
   469  
   470  	return nil
   471  }
   472  
   473  // Session returns the ThreadGroup's Session.
   474  //
   475  // A reference is not taken on the session.
   476  func (tg *ThreadGroup) Session() *Session {
   477  	tg.pidns.owner.mu.RLock()
   478  	defer tg.pidns.owner.mu.RUnlock()
   479  	return tg.processGroup.session
   480  }
   481  
   482  // IDOfSession returns the Session assigned to s in PID namespace ns.
   483  //
   484  // If this group isn't visible in this namespace, zero will be returned. It is
   485  // the callers responsibility to check that before using this function.
   486  func (ns *PIDNamespace) IDOfSession(s *Session) SessionID {
   487  	ns.owner.mu.RLock()
   488  	defer ns.owner.mu.RUnlock()
   489  	return ns.sids[s]
   490  }
   491  
   492  // SessionWithID returns the Session with the given ID in the PID namespace ns,
   493  // or nil if that given ID is not defined in this namespace.
   494  //
   495  // A reference is not taken on the session.
   496  func (ns *PIDNamespace) SessionWithID(id SessionID) *Session {
   497  	ns.owner.mu.RLock()
   498  	defer ns.owner.mu.RUnlock()
   499  	return ns.sessions[id]
   500  }
   501  
   502  // ProcessGroup returns the ThreadGroup's ProcessGroup.
   503  //
   504  // A reference is not taken on the process group.
   505  func (tg *ThreadGroup) ProcessGroup() *ProcessGroup {
   506  	tg.pidns.owner.mu.RLock()
   507  	defer tg.pidns.owner.mu.RUnlock()
   508  	return tg.processGroup
   509  }
   510  
   511  // IDOfProcessGroup returns the process group assigned to pg in PID namespace ns.
   512  //
   513  // The same constraints apply as IDOfSession.
   514  func (ns *PIDNamespace) IDOfProcessGroup(pg *ProcessGroup) ProcessGroupID {
   515  	ns.owner.mu.RLock()
   516  	defer ns.owner.mu.RUnlock()
   517  	return ns.pgids[pg]
   518  }
   519  
   520  // ProcessGroupWithID returns the ProcessGroup with the given ID in the PID
   521  // namespace ns, or nil if that given ID is not defined in this namespace.
   522  //
   523  // A reference is not taken on the process group.
   524  func (ns *PIDNamespace) ProcessGroupWithID(id ProcessGroupID) *ProcessGroup {
   525  	ns.owner.mu.RLock()
   526  	defer ns.owner.mu.RUnlock()
   527  	return ns.processGroups[id]
   528  }