github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/sentry/kernel/sessions.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"github.com/ttpreport/gvisor-ligolo/pkg/abi/linux"
    19  	"github.com/ttpreport/gvisor-ligolo/pkg/errors/linuxerr"
    20  )
    21  
    22  // SessionID is the public identifier.
    23  type SessionID ThreadID
    24  
    25  // ProcessGroupID is the public identifier.
    26  type ProcessGroupID ThreadID
    27  
    28  // Session contains a leader threadgroup and a list of ProcessGroups.
    29  //
    30  // +stateify savable
    31  type Session struct {
    32  	SessionRefs
    33  
    34  	// leader is the originator of the Session.
    35  	//
    36  	// Note that this may no longer be running (and may be reaped), so the
    37  	// ID is cached upon initial creation. The leader is still required
    38  	// however, since its PIDNamespace defines the scope of the Session.
    39  	//
    40  	// The leader is immutable.
    41  	leader *ThreadGroup
    42  
    43  	// id is the cached identifier in the leader's namespace.
    44  	//
    45  	// The id is immutable.
    46  	id SessionID
    47  
    48  	// foreground is the foreground process group.
    49  	//
    50  	// This is protected by TaskSet.mu.
    51  	foreground *ProcessGroup
    52  
    53  	// ProcessGroups is a list of process groups in this Session. This is
    54  	// protected by TaskSet.mu.
    55  	processGroups processGroupList
    56  
    57  	// sessionEntry is the embed for TaskSet.sessions. This is protected by
    58  	// TaskSet.mu.
    59  	sessionEntry
    60  }
    61  
    62  // DecRef drops a reference.
    63  //
    64  // Precondition: callers must hold TaskSet.mu for writing.
    65  func (s *Session) DecRef() {
    66  	s.SessionRefs.DecRef(func() {
    67  		// Remove translations from the leader.
    68  		for ns := s.leader.pidns; ns != nil; ns = ns.parent {
    69  			id := ns.sids[s]
    70  			delete(ns.sids, s)
    71  			delete(ns.sessions, id)
    72  		}
    73  
    74  		// Remove from the list of global Sessions.
    75  		s.leader.pidns.owner.sessions.Remove(s)
    76  	})
    77  }
    78  
    79  // ProcessGroup contains an originator threadgroup and a parent Session.
    80  //
    81  // +stateify savable
    82  type ProcessGroup struct {
    83  	refs ProcessGroupRefs
    84  
    85  	// originator is the originator of the group.
    86  	//
    87  	// See note re: leader in Session. The same applies here.
    88  	//
    89  	// The originator is immutable.
    90  	originator *ThreadGroup
    91  
    92  	// id is the cached identifier in the originator's namespace.
    93  	//
    94  	// The id is immutable.
    95  	id ProcessGroupID
    96  
    97  	// Session is the parent Session.
    98  	//
    99  	// The session is immutable.
   100  	session *Session
   101  
   102  	// ancestors is the number of thread groups in this process group whose
   103  	// parent is in a different process group in the same session.
   104  	//
   105  	// The name is derived from the fact that process groups where
   106  	// ancestors is zero are considered "orphans".
   107  	//
   108  	// ancestors is protected by TaskSet.mu.
   109  	ancestors uint32
   110  
   111  	// processGroupEntry is the embedded entry for Sessions.groups. This is
   112  	// protected by TaskSet.mu.
   113  	processGroupEntry
   114  }
   115  
   116  // Originator retuns the originator of the process group.
   117  func (pg *ProcessGroup) Originator() *ThreadGroup {
   118  	return pg.originator
   119  }
   120  
   121  // IsOrphan returns true if this process group is an orphan.
   122  func (pg *ProcessGroup) IsOrphan() bool {
   123  	ts := pg.originator.TaskSet()
   124  	ts.mu.RLock()
   125  	defer ts.mu.RUnlock()
   126  	return pg.ancestors == 0
   127  }
   128  
   129  // incRefWithParent grabs a reference.
   130  //
   131  // This function is called when this ProcessGroup is being associated with some
   132  // new ThreadGroup, tg. parentPG is the ProcessGroup of tg's parent
   133  // ThreadGroup. If tg is init, then parentPG may be nil.
   134  //
   135  // Precondition: callers must hold TaskSet.mu for writing.
   136  func (pg *ProcessGroup) incRefWithParent(parentPG *ProcessGroup) {
   137  	// We acquire an "ancestor" reference in the case of a nil parent.
   138  	// This is because the process being associated is init, and init can
   139  	// never be orphaned (we count it as always having an ancestor).
   140  	if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) {
   141  		pg.ancestors++
   142  	}
   143  
   144  	pg.refs.IncRef()
   145  }
   146  
   147  // decRefWithParent drops a reference.
   148  //
   149  // parentPG is per incRefWithParent.
   150  //
   151  // Precondition: callers must hold TaskSet.mu for writing.
   152  func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) {
   153  	// See incRefWithParent regarding parent == nil.
   154  	if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) {
   155  		pg.ancestors--
   156  	}
   157  
   158  	alive := true
   159  	pg.refs.DecRef(func() {
   160  		alive = false // don't bother with handleOrphan.
   161  
   162  		// Remove translations from the originator.
   163  		for ns := pg.originator.pidns; ns != nil; ns = ns.parent {
   164  			id := ns.pgids[pg]
   165  			delete(ns.pgids, pg)
   166  			delete(ns.processGroups, id)
   167  		}
   168  
   169  		// Remove the list of process groups.
   170  		pg.session.processGroups.Remove(pg)
   171  		pg.session.DecRef()
   172  	})
   173  	if alive {
   174  		pg.handleOrphan()
   175  	}
   176  }
   177  
   178  // parentPG returns the parent process group.
   179  //
   180  // Precondition: callers must hold TaskSet.mu.
   181  func (tg *ThreadGroup) parentPG() *ProcessGroup {
   182  	if tg.leader.parent != nil {
   183  		return tg.leader.parent.tg.processGroup
   184  	}
   185  	return nil
   186  }
   187  
   188  // handleOrphan checks whether the process group is an orphan and has any
   189  // stopped jobs. If yes, then appropriate signals are delivered to each thread
   190  // group within the process group.
   191  //
   192  // Precondition: callers must hold TaskSet.mu for writing.
   193  func (pg *ProcessGroup) handleOrphan() {
   194  	// Check if this process is an orphan.
   195  	if pg.ancestors != 0 {
   196  		return
   197  	}
   198  
   199  	// See if there are any stopped jobs.
   200  	hasStopped := false
   201  	pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) {
   202  		if tg.processGroup != pg {
   203  			return
   204  		}
   205  		tg.signalHandlers.mu.NestedLock(signalHandlersLockTg)
   206  		if tg.groupStopComplete {
   207  			hasStopped = true
   208  		}
   209  		tg.signalHandlers.mu.NestedUnlock(signalHandlersLockTg)
   210  	})
   211  	if !hasStopped {
   212  		return
   213  	}
   214  
   215  	// Deliver appropriate signals to all thread groups.
   216  	pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) {
   217  		if tg.processGroup != pg {
   218  			return
   219  		}
   220  		tg.signalHandlers.mu.NestedLock(signalHandlersLockTg)
   221  		tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGHUP), true /* group */)
   222  		tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGCONT), true /* group */)
   223  		tg.signalHandlers.mu.NestedUnlock(signalHandlersLockTg)
   224  	})
   225  
   226  	return
   227  }
   228  
   229  // Session returns the process group's session without taking a reference.
   230  func (pg *ProcessGroup) Session() *Session {
   231  	return pg.session
   232  }
   233  
   234  // SendSignal sends a signal to all processes inside the process group. It is
   235  // analogous to kernel/signal.c:kill_pgrp.
   236  func (pg *ProcessGroup) SendSignal(info *linux.SignalInfo) error {
   237  	tasks := pg.originator.TaskSet()
   238  	tasks.mu.RLock()
   239  	defer tasks.mu.RUnlock()
   240  
   241  	var lastErr error
   242  	for tg := range tasks.Root.tgids {
   243  		if tg.processGroup == pg {
   244  			tg.signalHandlers.mu.Lock()
   245  			infoCopy := *info
   246  			if err := tg.leader.sendSignalLocked(&infoCopy, true /*group*/); err != nil {
   247  				lastErr = err
   248  			}
   249  			tg.signalHandlers.mu.Unlock()
   250  		}
   251  	}
   252  	return lastErr
   253  }
   254  
   255  // CreateSession creates a new Session, with the ThreadGroup as the leader.
   256  //
   257  // EPERM may be returned if either the given ThreadGroup is already a Session
   258  // leader, or a ProcessGroup already exists for the ThreadGroup's ID.
   259  func (tg *ThreadGroup) CreateSession() error {
   260  	tg.pidns.owner.mu.Lock()
   261  	defer tg.pidns.owner.mu.Unlock()
   262  	tg.signalHandlers.mu.Lock()
   263  	defer tg.signalHandlers.mu.Unlock()
   264  	return tg.createSession()
   265  }
   266  
   267  // createSession creates a new session for a threadgroup.
   268  //
   269  // Precondition: callers must hold TaskSet.mu and the signal mutex for writing.
   270  func (tg *ThreadGroup) createSession() error {
   271  	// Get the ID for this thread in the current namespace.
   272  	id := tg.pidns.tgids[tg]
   273  
   274  	// Check if this ThreadGroup already leads a Session, or
   275  	// if the proposed group is already taken.
   276  	for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() {
   277  		if s.leader.pidns != tg.pidns {
   278  			continue
   279  		}
   280  		if s.leader == tg {
   281  			return linuxerr.EPERM
   282  		}
   283  		if s.id == SessionID(id) {
   284  			return linuxerr.EPERM
   285  		}
   286  		for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() {
   287  			if pg.id == ProcessGroupID(id) {
   288  				return linuxerr.EPERM
   289  			}
   290  		}
   291  	}
   292  
   293  	// Create a new Session, with a single reference.
   294  	s := &Session{
   295  		id:     SessionID(id),
   296  		leader: tg,
   297  	}
   298  	s.InitRefs()
   299  
   300  	// Create a new ProcessGroup, belonging to that Session.
   301  	// This also has a single reference (assigned below).
   302  	//
   303  	// Note that since this is a new session and a new process group, there
   304  	// will be zero ancestors for this process group. (It is an orphan at
   305  	// this point.)
   306  	pg := &ProcessGroup{
   307  		id:         ProcessGroupID(id),
   308  		originator: tg,
   309  		session:    s,
   310  		ancestors:  0,
   311  	}
   312  	pg.refs.InitRefs()
   313  
   314  	// Tie them and return the result.
   315  	s.processGroups.PushBack(pg)
   316  	tg.pidns.owner.sessions.PushBack(s)
   317  
   318  	// Leave the current group, and assign the new one.
   319  	if tg.processGroup != nil {
   320  		oldParentPG := tg.parentPG()
   321  		tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
   322  			childTG.processGroup.incRefWithParent(pg)
   323  			childTG.processGroup.decRefWithParent(oldParentPG)
   324  		})
   325  		// If tg.processGroup is an orphan, decRefWithParent will lock
   326  		// the signal mutex of each thread group in tg.processGroup.
   327  		// However, tg's signal mutex may already be locked at this
   328  		// point. We change tg's process group before calling
   329  		// decRefWithParent to avoid locking tg's signal mutex twice.
   330  		oldPG := tg.processGroup
   331  		tg.processGroup = pg
   332  		oldPG.decRefWithParent(oldParentPG)
   333  	} else {
   334  		// The current process group may be nil only in the case of an
   335  		// unparented thread group (i.e. the init process). This would
   336  		// not normally occur, but we allow it for the convenience of
   337  		// CreateSession working from that point. There will be no
   338  		// child processes. We always say that the very first group
   339  		// created has ancestors (avoids checks elsewhere).
   340  		//
   341  		// Note that this mirrors the parent == nil logic in
   342  		// incRef/decRef/reparent, which counts nil as an ancestor.
   343  		tg.processGroup = pg
   344  		tg.processGroup.ancestors++
   345  	}
   346  
   347  	// Ensure a translation is added to all namespaces.
   348  	for ns := tg.pidns; ns != nil; ns = ns.parent {
   349  		local := ns.tgids[tg]
   350  		ns.sids[s] = SessionID(local)
   351  		ns.sessions[SessionID(local)] = s
   352  		ns.pgids[pg] = ProcessGroupID(local)
   353  		ns.processGroups[ProcessGroupID(local)] = pg
   354  	}
   355  
   356  	// Disconnect from the controlling terminal.
   357  	tg.tty = nil
   358  
   359  	return nil
   360  }
   361  
   362  // CreateProcessGroup creates a new process group.
   363  //
   364  // An EPERM error will be returned if the ThreadGroup belongs to a different
   365  // Session, is a Session leader or the group already exists.
   366  func (tg *ThreadGroup) CreateProcessGroup() error {
   367  	tg.pidns.owner.mu.Lock()
   368  	defer tg.pidns.owner.mu.Unlock()
   369  
   370  	// Get the ID for this thread in the current namespace.
   371  	id := tg.pidns.tgids[tg]
   372  
   373  	// Check whether a process still exists or not.
   374  	if id == 0 {
   375  		return linuxerr.ESRCH
   376  	}
   377  
   378  	// Per above, check for a Session leader or existing group.
   379  	for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() {
   380  		if s.leader.pidns != tg.pidns {
   381  			continue
   382  		}
   383  		if s.leader == tg {
   384  			return linuxerr.EPERM
   385  		}
   386  		for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() {
   387  			if pg.id == ProcessGroupID(id) {
   388  				return linuxerr.EPERM
   389  			}
   390  		}
   391  	}
   392  
   393  	// Create a new ProcessGroup, belonging to the current Session.
   394  	//
   395  	// We manually adjust the ancestors if the parent is in the same
   396  	// session.
   397  	tg.processGroup.session.IncRef()
   398  	pg := ProcessGroup{
   399  		id:         ProcessGroupID(id),
   400  		originator: tg,
   401  		session:    tg.processGroup.session,
   402  	}
   403  	pg.refs.InitRefs()
   404  
   405  	if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session {
   406  		pg.ancestors++
   407  	}
   408  
   409  	// Assign the new process group; adjust children.
   410  	oldParentPG := tg.parentPG()
   411  	tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
   412  		childTG.processGroup.incRefWithParent(&pg)
   413  		childTG.processGroup.decRefWithParent(oldParentPG)
   414  	})
   415  	tg.processGroup.decRefWithParent(oldParentPG)
   416  	tg.processGroup = &pg
   417  
   418  	// Add the new process group to the session.
   419  	pg.session.processGroups.PushBack(&pg)
   420  
   421  	// Ensure this translation is added to all namespaces.
   422  	for ns := tg.pidns; ns != nil; ns = ns.parent {
   423  		local := ns.tgids[tg]
   424  		ns.pgids[&pg] = ProcessGroupID(local)
   425  		ns.processGroups[ProcessGroupID(local)] = &pg
   426  	}
   427  
   428  	return nil
   429  }
   430  
   431  // JoinProcessGroup joins an existing process group.
   432  //
   433  // This function will return EACCES if an exec has been performed since fork
   434  // by the given ThreadGroup, and EPERM if the Sessions are not the same or the
   435  // group does not exist.
   436  //
   437  // If checkExec is set, then the join is not permitted after the process has
   438  // executed exec at least once.
   439  func (tg *ThreadGroup) JoinProcessGroup(pidns *PIDNamespace, pgid ProcessGroupID, checkExec bool) error {
   440  	pidns.owner.mu.Lock()
   441  	defer pidns.owner.mu.Unlock()
   442  
   443  	// Check whether the process still exists or not.
   444  	if _, ok := pidns.tgids[tg]; !ok {
   445  		return linuxerr.ESRCH
   446  	}
   447  
   448  	// Lookup the ProcessGroup.
   449  	pg := pidns.processGroups[pgid]
   450  	if pg == nil {
   451  		return linuxerr.EPERM
   452  	}
   453  
   454  	// Disallow the join if an execve has performed, per POSIX.
   455  	if checkExec && tg.execed {
   456  		return linuxerr.EACCES
   457  	}
   458  
   459  	// See if it's in the same session as ours.
   460  	if pg.session != tg.processGroup.session {
   461  		return linuxerr.EPERM
   462  	}
   463  
   464  	// Join the group; adjust children.
   465  	parentPG := tg.parentPG()
   466  	pg.incRefWithParent(parentPG)
   467  	tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
   468  		childTG.processGroup.incRefWithParent(pg)
   469  		childTG.processGroup.decRefWithParent(tg.processGroup)
   470  	})
   471  	tg.processGroup.decRefWithParent(parentPG)
   472  	tg.processGroup = pg
   473  
   474  	return nil
   475  }
   476  
   477  // Session returns the ThreadGroup's Session.
   478  //
   479  // A reference is not taken on the session.
   480  func (tg *ThreadGroup) Session() *Session {
   481  	tg.pidns.owner.mu.RLock()
   482  	defer tg.pidns.owner.mu.RUnlock()
   483  	return tg.processGroup.session
   484  }
   485  
   486  // IDOfSession returns the Session assigned to s in PID namespace ns.
   487  //
   488  // If this group isn't visible in this namespace, zero will be returned. It is
   489  // the callers responsibility to check that before using this function.
   490  func (ns *PIDNamespace) IDOfSession(s *Session) SessionID {
   491  	ns.owner.mu.RLock()
   492  	defer ns.owner.mu.RUnlock()
   493  	return ns.sids[s]
   494  }
   495  
   496  // SessionWithID returns the Session with the given ID in the PID namespace ns,
   497  // or nil if that given ID is not defined in this namespace.
   498  //
   499  // A reference is not taken on the session.
   500  func (ns *PIDNamespace) SessionWithID(id SessionID) *Session {
   501  	ns.owner.mu.RLock()
   502  	defer ns.owner.mu.RUnlock()
   503  	return ns.sessions[id]
   504  }
   505  
   506  // ProcessGroup returns the ThreadGroup's ProcessGroup.
   507  //
   508  // A reference is not taken on the process group.
   509  func (tg *ThreadGroup) ProcessGroup() *ProcessGroup {
   510  	tg.pidns.owner.mu.RLock()
   511  	defer tg.pidns.owner.mu.RUnlock()
   512  	return tg.processGroup
   513  }
   514  
   515  // IDOfProcessGroup returns the process group assigned to pg in PID namespace ns.
   516  //
   517  // The same constraints apply as IDOfSession.
   518  func (ns *PIDNamespace) IDOfProcessGroup(pg *ProcessGroup) ProcessGroupID {
   519  	ns.owner.mu.RLock()
   520  	defer ns.owner.mu.RUnlock()
   521  	return ns.pgids[pg]
   522  }
   523  
   524  // ProcessGroupWithID returns the ProcessGroup with the given ID in the PID
   525  // namespace ns, or nil if that given ID is not defined in this namespace.
   526  //
   527  // A reference is not taken on the process group.
   528  func (ns *PIDNamespace) ProcessGroupWithID(id ProcessGroupID) *ProcessGroup {
   529  	ns.owner.mu.RLock()
   530  	defer ns.owner.mu.RUnlock()
   531  	return ns.processGroups[id]
   532  }