gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/kernel/sessions.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 import ( 18 "gvisor.dev/gvisor/pkg/abi/linux" 19 "gvisor.dev/gvisor/pkg/errors/linuxerr" 20 ) 21 22 // SessionID is the public identifier. 23 type SessionID ThreadID 24 25 // ProcessGroupID is the public identifier. 26 type ProcessGroupID ThreadID 27 28 // Session contains a leader threadgroup and a list of ProcessGroups. 29 // 30 // +stateify savable 31 type Session struct { 32 SessionRefs 33 34 // leader is the originator of the Session. 35 // 36 // Note that this may no longer be running (and may be reaped), so the 37 // ID is cached upon initial creation. The leader is still required 38 // however, since its PIDNamespace defines the scope of the Session. 39 // 40 // The leader is immutable. 41 leader *ThreadGroup 42 43 // id is the cached identifier in the leader's namespace. 44 // 45 // The id is immutable. 46 id SessionID 47 48 // foreground is the foreground process group. 49 // 50 // This is protected by TaskSet.mu. 51 foreground *ProcessGroup 52 53 // ProcessGroups is a list of process groups in this Session. This is 54 // protected by TaskSet.mu. 55 processGroups processGroupList 56 57 // sessionEntry is the embed for TaskSet.sessions. This is protected by 58 // TaskSet.mu. 59 sessionEntry 60 } 61 62 // DecRef drops a reference. 63 // 64 // Precondition: callers must hold TaskSet.mu for writing. 65 func (s *Session) DecRef() { 66 s.SessionRefs.DecRef(func() { 67 // Remove translations from the leader. 68 for ns := s.leader.pidns; ns != nil; ns = ns.parent { 69 id := ns.sids[s] 70 delete(ns.sids, s) 71 delete(ns.sessions, id) 72 } 73 74 // Remove from the list of global Sessions. 75 s.leader.pidns.owner.sessions.Remove(s) 76 }) 77 } 78 79 // ProcessGroup contains an originator threadgroup and a parent Session. 80 // 81 // +stateify savable 82 type ProcessGroup struct { 83 refs ProcessGroupRefs 84 85 // originator is the originator of the group. 86 // 87 // See note re: leader in Session. The same applies here. 88 // 89 // The originator is immutable. 90 originator *ThreadGroup 91 92 // id is the cached identifier in the originator's namespace. 93 // 94 // The id is immutable. 95 id ProcessGroupID 96 97 // Session is the parent Session. 98 // 99 // The session is immutable. 100 session *Session 101 102 // ancestors is the number of thread groups in this process group whose 103 // parent is in a different process group in the same session. 104 // 105 // The name is derived from the fact that process groups where 106 // ancestors is zero are considered "orphans". 107 // 108 // ancestors is protected by TaskSet.mu. 109 ancestors uint32 110 111 // processGroupEntry is the embedded entry for Sessions.groups. This is 112 // protected by TaskSet.mu. 113 processGroupEntry 114 } 115 116 // Originator returns the originator of the process group. 117 func (pg *ProcessGroup) Originator() *ThreadGroup { 118 return pg.originator 119 } 120 121 // IsOrphan returns true if this process group is an orphan. 122 func (pg *ProcessGroup) IsOrphan() bool { 123 ts := pg.originator.TaskSet() 124 ts.mu.RLock() 125 defer ts.mu.RUnlock() 126 return pg.ancestors == 0 127 } 128 129 // incRefWithParent grabs a reference. 130 // 131 // This function is called when this ProcessGroup is being associated with some 132 // new ThreadGroup, tg. parentPG is the ProcessGroup of tg's parent 133 // ThreadGroup. If tg is init, then parentPG may be nil. 134 // 135 // Precondition: callers must hold TaskSet.mu for writing. 136 func (pg *ProcessGroup) incRefWithParent(parentPG *ProcessGroup) { 137 // We acquire an "ancestor" reference in the case of a nil parent. 138 // This is because the process being associated is init, and init can 139 // never be orphaned (we count it as always having an ancestor). 140 if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) { 141 pg.ancestors++ 142 } 143 144 pg.refs.IncRef() 145 } 146 147 // decRefWithParent drops a reference. 148 // 149 // parentPG is per incRefWithParent. 150 // 151 // Precondition: callers must hold TaskSet.mu for writing. 152 func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) { 153 // See incRefWithParent regarding parent == nil. 154 if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) { 155 pg.ancestors-- 156 } 157 158 alive := true 159 pg.refs.DecRef(func() { 160 alive = false // don't bother with handleOrphan. 161 162 // Remove translations from the originator. 163 for ns := pg.originator.pidns; ns != nil; ns = ns.parent { 164 id := ns.pgids[pg] 165 delete(ns.pgids, pg) 166 delete(ns.processGroups, id) 167 } 168 169 // Remove the list of process groups. 170 pg.session.processGroups.Remove(pg) 171 pg.session.DecRef() 172 }) 173 if alive { 174 pg.handleOrphan() 175 } 176 } 177 178 // parentPG returns the parent process group. 179 // 180 // Precondition: callers must hold TaskSet.mu. 181 func (tg *ThreadGroup) parentPG() *ProcessGroup { 182 if tg.leader.parent != nil { 183 return tg.leader.parent.tg.processGroup 184 } 185 return nil 186 } 187 188 // handleOrphan checks whether the process group is an orphan and has any 189 // stopped jobs. If yes, then appropriate signals are delivered to each thread 190 // group within the process group. 191 // 192 // Precondition: callers must hold TaskSet.mu for writing. 193 func (pg *ProcessGroup) handleOrphan() { 194 // Check if this process is an orphan. 195 if pg.ancestors != 0 { 196 return 197 } 198 199 // See if there are any stopped jobs. 200 hasStopped := false 201 pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) { 202 if tg.processGroup != pg { 203 return 204 } 205 tg.signalHandlers.mu.NestedLock(signalHandlersLockTg) 206 if tg.groupStopComplete { 207 hasStopped = true 208 } 209 tg.signalHandlers.mu.NestedUnlock(signalHandlersLockTg) 210 }) 211 if !hasStopped { 212 return 213 } 214 215 // Deliver appropriate signals to all thread groups. 216 pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) { 217 if tg.processGroup != pg { 218 return 219 } 220 tg.signalHandlers.mu.NestedLock(signalHandlersLockTg) 221 tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGHUP), true /* group */) 222 tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGCONT), true /* group */) 223 tg.signalHandlers.mu.NestedUnlock(signalHandlersLockTg) 224 }) 225 226 return 227 } 228 229 // Session returns the process group's session without taking a reference. 230 func (pg *ProcessGroup) Session() *Session { 231 return pg.session 232 } 233 234 // SendSignal sends a signal to all processes inside the process group. It is 235 // analogous to kernel/signal.c:kill_pgrp. 236 func (pg *ProcessGroup) SendSignal(info *linux.SignalInfo) error { 237 tasks := pg.originator.TaskSet() 238 tasks.mu.RLock() 239 defer tasks.mu.RUnlock() 240 241 var lastErr error 242 for tg := range tasks.Root.tgids { 243 if tg.processGroup == pg { 244 tg.signalHandlers.mu.Lock() 245 infoCopy := *info 246 if err := tg.leader.sendSignalLocked(&infoCopy, true /*group*/); err != nil { 247 lastErr = err 248 } 249 tg.signalHandlers.mu.Unlock() 250 } 251 } 252 return lastErr 253 } 254 255 // CreateSession creates a new Session, with the ThreadGroup as the leader. 256 // 257 // EPERM may be returned if either the given ThreadGroup is already a Session 258 // leader, or a ProcessGroup already exists for the ThreadGroup's ID. 259 func (tg *ThreadGroup) CreateSession() (SessionID, error) { 260 tg.pidns.owner.mu.Lock() 261 defer tg.pidns.owner.mu.Unlock() 262 tg.signalHandlers.mu.Lock() 263 defer tg.signalHandlers.mu.Unlock() 264 return tg.createSession() 265 } 266 267 // createSession creates a new session for a threadgroup. 268 // 269 // Precondition: callers must hold TaskSet.mu and the signal mutex for writing. 270 func (tg *ThreadGroup) createSession() (SessionID, error) { 271 // Get the ID for this thread in the current namespace. 272 id := tg.pidns.tgids[tg] 273 274 // Check if this ThreadGroup already leads a Session, or 275 // if the proposed group is already taken. 276 for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() { 277 if s.leader.pidns != tg.pidns { 278 continue 279 } 280 if s.leader == tg { 281 return -1, linuxerr.EPERM 282 } 283 if s.id == SessionID(id) { 284 return -1, linuxerr.EPERM 285 } 286 for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() { 287 if pg.id == ProcessGroupID(id) { 288 return -1, linuxerr.EPERM 289 } 290 } 291 } 292 293 // Create a new Session, with a single reference. 294 sid := SessionID(id) 295 s := &Session{ 296 id: sid, 297 leader: tg, 298 } 299 s.InitRefs() 300 301 // Create a new ProcessGroup, belonging to that Session. 302 // This also has a single reference (assigned below). 303 // 304 // Note that since this is a new session and a new process group, there 305 // will be zero ancestors for this process group. (It is an orphan at 306 // this point.) 307 pg := &ProcessGroup{ 308 id: ProcessGroupID(id), 309 originator: tg, 310 session: s, 311 ancestors: 0, 312 } 313 pg.refs.InitRefs() 314 315 // Tie them and return the result. 316 s.processGroups.PushBack(pg) 317 tg.pidns.owner.sessions.PushBack(s) 318 319 // Leave the current group, and assign the new one. 320 if tg.processGroup != nil { 321 oldParentPG := tg.parentPG() 322 tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) { 323 childTG.processGroup.incRefWithParent(pg) 324 childTG.processGroup.decRefWithParent(oldParentPG) 325 }) 326 // If tg.processGroup is an orphan, decRefWithParent will lock 327 // the signal mutex of each thread group in tg.processGroup. 328 // However, tg's signal mutex may already be locked at this 329 // point. We change tg's process group before calling 330 // decRefWithParent to avoid locking tg's signal mutex twice. 331 oldPG := tg.processGroup 332 tg.processGroup = pg 333 oldPG.decRefWithParent(oldParentPG) 334 } else { 335 // The current process group may be nil only in the case of an 336 // unparented thread group (i.e. the init process). This would 337 // not normally occur, but we allow it for the convenience of 338 // CreateSession working from that point. There will be no 339 // child processes. We always say that the very first group 340 // created has ancestors (avoids checks elsewhere). 341 // 342 // Note that this mirrors the parent == nil logic in 343 // incRef/decRef/reparent, which counts nil as an ancestor. 344 tg.processGroup = pg 345 tg.processGroup.ancestors++ 346 } 347 348 // Ensure a translation is added to all namespaces. 349 for ns := tg.pidns; ns != nil; ns = ns.parent { 350 local := ns.tgids[tg] 351 ns.sids[s] = SessionID(local) 352 ns.sessions[SessionID(local)] = s 353 ns.pgids[pg] = ProcessGroupID(local) 354 ns.processGroups[ProcessGroupID(local)] = pg 355 } 356 357 // Disconnect from the controlling terminal. 358 tg.tty = nil 359 360 return sid, nil 361 } 362 363 // CreateProcessGroup creates a new process group. 364 // 365 // An EPERM error will be returned if the ThreadGroup belongs to a different 366 // Session, is a Session leader or the group already exists. 367 func (tg *ThreadGroup) CreateProcessGroup() error { 368 tg.pidns.owner.mu.Lock() 369 defer tg.pidns.owner.mu.Unlock() 370 371 // Get the ID for this thread in the current namespace. 372 id := tg.pidns.tgids[tg] 373 374 // Check whether a process still exists or not. 375 if id == 0 { 376 return linuxerr.ESRCH 377 } 378 379 // Per above, check for a Session leader or existing group. 380 for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() { 381 if s.leader.pidns != tg.pidns { 382 continue 383 } 384 if s.leader == tg { 385 return linuxerr.EPERM 386 } 387 for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() { 388 if pg.id == ProcessGroupID(id) { 389 return linuxerr.EPERM 390 } 391 } 392 } 393 394 // Create a new ProcessGroup, belonging to the current Session. 395 // 396 // We manually adjust the ancestors if the parent is in the same 397 // session. 398 tg.processGroup.session.IncRef() 399 pg := ProcessGroup{ 400 id: ProcessGroupID(id), 401 originator: tg, 402 session: tg.processGroup.session, 403 } 404 pg.refs.InitRefs() 405 406 if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session { 407 pg.ancestors++ 408 } 409 410 // Assign the new process group; adjust children. 411 oldParentPG := tg.parentPG() 412 tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) { 413 childTG.processGroup.incRefWithParent(&pg) 414 childTG.processGroup.decRefWithParent(oldParentPG) 415 }) 416 tg.processGroup.decRefWithParent(oldParentPG) 417 tg.processGroup = &pg 418 419 // Add the new process group to the session. 420 pg.session.processGroups.PushBack(&pg) 421 422 // Ensure this translation is added to all namespaces. 423 for ns := tg.pidns; ns != nil; ns = ns.parent { 424 local := ns.tgids[tg] 425 ns.pgids[&pg] = ProcessGroupID(local) 426 ns.processGroups[ProcessGroupID(local)] = &pg 427 } 428 429 return nil 430 } 431 432 // JoinProcessGroup joins an existing process group. 433 // 434 // This function will return EACCES if an exec has been performed since fork 435 // by the given ThreadGroup, and EPERM if the Sessions are not the same or the 436 // group does not exist. 437 // 438 // If checkExec is set, then the join is not permitted after the process has 439 // executed exec at least once. 440 func (tg *ThreadGroup) JoinProcessGroup(pidns *PIDNamespace, pgid ProcessGroupID, checkExec bool) error { 441 pidns.owner.mu.Lock() 442 defer pidns.owner.mu.Unlock() 443 444 // Check whether the process still exists or not. 445 if _, ok := pidns.tgids[tg]; !ok { 446 return linuxerr.ESRCH 447 } 448 449 // Lookup the ProcessGroup. 450 pg := pidns.processGroups[pgid] 451 if pg == nil { 452 return linuxerr.EPERM 453 } 454 455 // Disallow the join if an execve has performed, per POSIX. 456 if checkExec && tg.execed { 457 return linuxerr.EACCES 458 } 459 460 // See if it's in the same session as ours. 461 if pg.session != tg.processGroup.session { 462 return linuxerr.EPERM 463 } 464 465 // Join the group; adjust children. 466 parentPG := tg.parentPG() 467 pg.incRefWithParent(parentPG) 468 tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) { 469 childTG.processGroup.incRefWithParent(pg) 470 childTG.processGroup.decRefWithParent(tg.processGroup) 471 }) 472 tg.processGroup.decRefWithParent(parentPG) 473 tg.processGroup = pg 474 475 return nil 476 } 477 478 // Session returns the ThreadGroup's Session. 479 // 480 // A reference is not taken on the session. 481 func (tg *ThreadGroup) Session() *Session { 482 tg.pidns.owner.mu.RLock() 483 defer tg.pidns.owner.mu.RUnlock() 484 return tg.processGroup.session 485 } 486 487 // IDOfSession returns the Session assigned to s in PID namespace ns. 488 // 489 // If this group isn't visible in this namespace, zero will be returned. It is 490 // the callers responsibility to check that before using this function. 491 func (ns *PIDNamespace) IDOfSession(s *Session) SessionID { 492 ns.owner.mu.RLock() 493 defer ns.owner.mu.RUnlock() 494 return ns.sids[s] 495 } 496 497 // SessionWithID returns the Session with the given ID in the PID namespace ns, 498 // or nil if that given ID is not defined in this namespace. 499 // 500 // A reference is not taken on the session. 501 func (ns *PIDNamespace) SessionWithID(id SessionID) *Session { 502 ns.owner.mu.RLock() 503 defer ns.owner.mu.RUnlock() 504 return ns.sessions[id] 505 } 506 507 // ProcessGroup returns the ThreadGroup's ProcessGroup. 508 // 509 // A reference is not taken on the process group. 510 func (tg *ThreadGroup) ProcessGroup() *ProcessGroup { 511 tg.pidns.owner.mu.RLock() 512 defer tg.pidns.owner.mu.RUnlock() 513 return tg.processGroup 514 } 515 516 // IDOfProcessGroup returns the process group assigned to pg in PID namespace ns. 517 // 518 // The same constraints apply as IDOfSession. 519 func (ns *PIDNamespace) IDOfProcessGroup(pg *ProcessGroup) ProcessGroupID { 520 ns.owner.mu.RLock() 521 defer ns.owner.mu.RUnlock() 522 return ns.pgids[pg] 523 } 524 525 // ProcessGroupWithID returns the ProcessGroup with the given ID in the PID 526 // namespace ns, or nil if that given ID is not defined in this namespace. 527 // 528 // A reference is not taken on the process group. 529 func (ns *PIDNamespace) ProcessGroupWithID(id ProcessGroupID) *ProcessGroup { 530 ns.owner.mu.RLock() 531 defer ns.owner.mu.RUnlock() 532 return ns.processGroups[id] 533 }