github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/kernel/sessions.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 import ( 18 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 19 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 20 ) 21 22 // SessionID is the public identifier. 23 type SessionID ThreadID 24 25 // ProcessGroupID is the public identifier. 26 type ProcessGroupID ThreadID 27 28 // Session contains a leader threadgroup and a list of ProcessGroups. 29 // 30 // +stateify savable 31 type Session struct { 32 SessionRefs 33 34 // leader is the originator of the Session. 35 // 36 // Note that this may no longer be running (and may be reaped), so the 37 // ID is cached upon initial creation. The leader is still required 38 // however, since its PIDNamespace defines the scope of the Session. 39 // 40 // The leader is immutable. 41 leader *ThreadGroup 42 43 // id is the cached identifier in the leader's namespace. 44 // 45 // The id is immutable. 46 id SessionID 47 48 // foreground is the foreground process group. 49 // 50 // This is protected by TaskSet.mu. 51 foreground *ProcessGroup 52 53 // ProcessGroups is a list of process groups in this Session. This is 54 // protected by TaskSet.mu. 55 processGroups processGroupList 56 57 // sessionEntry is the embed for TaskSet.sessions. This is protected by 58 // TaskSet.mu. 59 sessionEntry 60 } 61 62 // DecRef drops a reference. 63 // 64 // Precondition: callers must hold TaskSet.mu for writing. 65 func (s *Session) DecRef() { 66 s.SessionRefs.DecRef(func() { 67 // Remove translations from the leader. 68 for ns := s.leader.pidns; ns != nil; ns = ns.parent { 69 id := ns.sids[s] 70 delete(ns.sids, s) 71 delete(ns.sessions, id) 72 } 73 74 // Remove from the list of global Sessions. 75 s.leader.pidns.owner.sessions.Remove(s) 76 }) 77 } 78 79 // ProcessGroup contains an originator threadgroup and a parent Session. 80 // 81 // +stateify savable 82 type ProcessGroup struct { 83 refs ProcessGroupRefs 84 85 // originator is the originator of the group. 86 // 87 // See note re: leader in Session. The same applies here. 88 // 89 // The originator is immutable. 90 originator *ThreadGroup 91 92 // id is the cached identifier in the originator's namespace. 93 // 94 // The id is immutable. 95 id ProcessGroupID 96 97 // Session is the parent Session. 98 // 99 // The session is immutable. 100 session *Session 101 102 // ancestors is the number of thread groups in this process group whose 103 // parent is in a different process group in the same session. 104 // 105 // The name is derived from the fact that process groups where 106 // ancestors is zero are considered "orphans". 107 // 108 // ancestors is protected by TaskSet.mu. 109 ancestors uint32 110 111 // processGroupEntry is the embedded entry for Sessions.groups. This is 112 // protected by TaskSet.mu. 113 processGroupEntry 114 } 115 116 // Originator retuns the originator of the process group. 117 func (pg *ProcessGroup) Originator() *ThreadGroup { 118 return pg.originator 119 } 120 121 // IsOrphan returns true if this process group is an orphan. 122 func (pg *ProcessGroup) IsOrphan() bool { 123 ts := pg.originator.TaskSet() 124 ts.mu.RLock() 125 defer ts.mu.RUnlock() 126 return pg.ancestors == 0 127 } 128 129 // incRefWithParent grabs a reference. 130 // 131 // This function is called when this ProcessGroup is being associated with some 132 // new ThreadGroup, tg. parentPG is the ProcessGroup of tg's parent 133 // ThreadGroup. If tg is init, then parentPG may be nil. 134 // 135 // Precondition: callers must hold TaskSet.mu for writing. 136 func (pg *ProcessGroup) incRefWithParent(parentPG *ProcessGroup) { 137 // We acquire an "ancestor" reference in the case of a nil parent. 138 // This is because the process being associated is init, and init can 139 // never be orphaned (we count it as always having an ancestor). 140 if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) { 141 pg.ancestors++ 142 } 143 144 pg.refs.IncRef() 145 } 146 147 // decRefWithParent drops a reference. 148 // 149 // parentPG is per incRefWithParent. 150 // 151 // Precondition: callers must hold TaskSet.mu for writing. 152 func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) { 153 // See incRefWithParent regarding parent == nil. 154 if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) { 155 pg.ancestors-- 156 } 157 158 alive := true 159 pg.refs.DecRef(func() { 160 alive = false // don't bother with handleOrphan. 161 162 // Remove translations from the originator. 163 for ns := pg.originator.pidns; ns != nil; ns = ns.parent { 164 id := ns.pgids[pg] 165 delete(ns.pgids, pg) 166 delete(ns.processGroups, id) 167 } 168 169 // Remove the list of process groups. 170 pg.session.processGroups.Remove(pg) 171 pg.session.DecRef() 172 }) 173 if alive { 174 pg.handleOrphan() 175 } 176 } 177 178 // parentPG returns the parent process group. 179 // 180 // Precondition: callers must hold TaskSet.mu. 181 func (tg *ThreadGroup) parentPG() *ProcessGroup { 182 if tg.leader.parent != nil { 183 return tg.leader.parent.tg.processGroup 184 } 185 return nil 186 } 187 188 // handleOrphan checks whether the process group is an orphan and has any 189 // stopped jobs. If yes, then appropriate signals are delivered to each thread 190 // group within the process group. 191 // 192 // Precondition: callers must hold TaskSet.mu for writing. 193 func (pg *ProcessGroup) handleOrphan() { 194 // Check if this process is an orphan. 195 if pg.ancestors != 0 { 196 return 197 } 198 199 // See if there are any stopped jobs. 200 hasStopped := false 201 pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) { 202 if tg.processGroup != pg { 203 return 204 } 205 tg.signalHandlers.mu.NestedLock(signalHandlersLockTg) 206 if tg.groupStopComplete { 207 hasStopped = true 208 } 209 tg.signalHandlers.mu.NestedUnlock(signalHandlersLockTg) 210 }) 211 if !hasStopped { 212 return 213 } 214 215 // Deliver appropriate signals to all thread groups. 216 pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) { 217 if tg.processGroup != pg { 218 return 219 } 220 tg.signalHandlers.mu.NestedLock(signalHandlersLockTg) 221 tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGHUP), true /* group */) 222 tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGCONT), true /* group */) 223 tg.signalHandlers.mu.NestedUnlock(signalHandlersLockTg) 224 }) 225 226 return 227 } 228 229 // Session returns the process group's session without taking a reference. 230 func (pg *ProcessGroup) Session() *Session { 231 return pg.session 232 } 233 234 // SendSignal sends a signal to all processes inside the process group. It is 235 // analogous to kernel/signal.c:kill_pgrp. 236 func (pg *ProcessGroup) SendSignal(info *linux.SignalInfo) error { 237 tasks := pg.originator.TaskSet() 238 tasks.mu.RLock() 239 defer tasks.mu.RUnlock() 240 241 var lastErr error 242 for tg := range tasks.Root.tgids { 243 if tg.processGroup == pg { 244 tg.signalHandlers.mu.Lock() 245 infoCopy := *info 246 if err := tg.leader.sendSignalLocked(&infoCopy, true /*group*/); err != nil { 247 lastErr = err 248 } 249 tg.signalHandlers.mu.Unlock() 250 } 251 } 252 return lastErr 253 } 254 255 // CreateSession creates a new Session, with the ThreadGroup as the leader. 256 // 257 // EPERM may be returned if either the given ThreadGroup is already a Session 258 // leader, or a ProcessGroup already exists for the ThreadGroup's ID. 259 func (tg *ThreadGroup) CreateSession() error { 260 tg.pidns.owner.mu.Lock() 261 defer tg.pidns.owner.mu.Unlock() 262 tg.signalHandlers.mu.Lock() 263 defer tg.signalHandlers.mu.Unlock() 264 return tg.createSession() 265 } 266 267 // createSession creates a new session for a threadgroup. 268 // 269 // Precondition: callers must hold TaskSet.mu and the signal mutex for writing. 270 func (tg *ThreadGroup) createSession() error { 271 // Get the ID for this thread in the current namespace. 272 id := tg.pidns.tgids[tg] 273 274 // Check if this ThreadGroup already leads a Session, or 275 // if the proposed group is already taken. 276 for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() { 277 if s.leader.pidns != tg.pidns { 278 continue 279 } 280 if s.leader == tg { 281 return linuxerr.EPERM 282 } 283 if s.id == SessionID(id) { 284 return linuxerr.EPERM 285 } 286 for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() { 287 if pg.id == ProcessGroupID(id) { 288 return linuxerr.EPERM 289 } 290 } 291 } 292 293 // Create a new Session, with a single reference. 294 s := &Session{ 295 id: SessionID(id), 296 leader: tg, 297 } 298 s.InitRefs() 299 300 // Create a new ProcessGroup, belonging to that Session. 301 // This also has a single reference (assigned below). 302 // 303 // Note that since this is a new session and a new process group, there 304 // will be zero ancestors for this process group. (It is an orphan at 305 // this point.) 306 pg := &ProcessGroup{ 307 id: ProcessGroupID(id), 308 originator: tg, 309 session: s, 310 ancestors: 0, 311 } 312 pg.refs.InitRefs() 313 314 // Tie them and return the result. 315 s.processGroups.PushBack(pg) 316 tg.pidns.owner.sessions.PushBack(s) 317 318 // Leave the current group, and assign the new one. 319 if tg.processGroup != nil { 320 oldParentPG := tg.parentPG() 321 tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) { 322 childTG.processGroup.incRefWithParent(pg) 323 childTG.processGroup.decRefWithParent(oldParentPG) 324 }) 325 // If tg.processGroup is an orphan, decRefWithParent will lock 326 // the signal mutex of each thread group in tg.processGroup. 327 // However, tg's signal mutex may already be locked at this 328 // point. We change tg's process group before calling 329 // decRefWithParent to avoid locking tg's signal mutex twice. 330 oldPG := tg.processGroup 331 tg.processGroup = pg 332 oldPG.decRefWithParent(oldParentPG) 333 } else { 334 // The current process group may be nil only in the case of an 335 // unparented thread group (i.e. the init process). This would 336 // not normally occur, but we allow it for the convenience of 337 // CreateSession working from that point. There will be no 338 // child processes. We always say that the very first group 339 // created has ancestors (avoids checks elsewhere). 340 // 341 // Note that this mirrors the parent == nil logic in 342 // incRef/decRef/reparent, which counts nil as an ancestor. 343 tg.processGroup = pg 344 tg.processGroup.ancestors++ 345 } 346 347 // Ensure a translation is added to all namespaces. 348 for ns := tg.pidns; ns != nil; ns = ns.parent { 349 local := ns.tgids[tg] 350 ns.sids[s] = SessionID(local) 351 ns.sessions[SessionID(local)] = s 352 ns.pgids[pg] = ProcessGroupID(local) 353 ns.processGroups[ProcessGroupID(local)] = pg 354 } 355 356 // Disconnect from the controlling terminal. 357 tg.tty = nil 358 359 return nil 360 } 361 362 // CreateProcessGroup creates a new process group. 363 // 364 // An EPERM error will be returned if the ThreadGroup belongs to a different 365 // Session, is a Session leader or the group already exists. 366 func (tg *ThreadGroup) CreateProcessGroup() error { 367 tg.pidns.owner.mu.Lock() 368 defer tg.pidns.owner.mu.Unlock() 369 370 // Get the ID for this thread in the current namespace. 371 id := tg.pidns.tgids[tg] 372 373 // Check whether a process still exists or not. 374 if id == 0 { 375 return linuxerr.ESRCH 376 } 377 378 // Per above, check for a Session leader or existing group. 379 for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() { 380 if s.leader.pidns != tg.pidns { 381 continue 382 } 383 if s.leader == tg { 384 return linuxerr.EPERM 385 } 386 for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() { 387 if pg.id == ProcessGroupID(id) { 388 return linuxerr.EPERM 389 } 390 } 391 } 392 393 // Create a new ProcessGroup, belonging to the current Session. 394 // 395 // We manually adjust the ancestors if the parent is in the same 396 // session. 397 tg.processGroup.session.IncRef() 398 pg := ProcessGroup{ 399 id: ProcessGroupID(id), 400 originator: tg, 401 session: tg.processGroup.session, 402 } 403 pg.refs.InitRefs() 404 405 if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session { 406 pg.ancestors++ 407 } 408 409 // Assign the new process group; adjust children. 410 oldParentPG := tg.parentPG() 411 tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) { 412 childTG.processGroup.incRefWithParent(&pg) 413 childTG.processGroup.decRefWithParent(oldParentPG) 414 }) 415 tg.processGroup.decRefWithParent(oldParentPG) 416 tg.processGroup = &pg 417 418 // Add the new process group to the session. 419 pg.session.processGroups.PushBack(&pg) 420 421 // Ensure this translation is added to all namespaces. 422 for ns := tg.pidns; ns != nil; ns = ns.parent { 423 local := ns.tgids[tg] 424 ns.pgids[&pg] = ProcessGroupID(local) 425 ns.processGroups[ProcessGroupID(local)] = &pg 426 } 427 428 return nil 429 } 430 431 // JoinProcessGroup joins an existing process group. 432 // 433 // This function will return EACCES if an exec has been performed since fork 434 // by the given ThreadGroup, and EPERM if the Sessions are not the same or the 435 // group does not exist. 436 // 437 // If checkExec is set, then the join is not permitted after the process has 438 // executed exec at least once. 439 func (tg *ThreadGroup) JoinProcessGroup(pidns *PIDNamespace, pgid ProcessGroupID, checkExec bool) error { 440 pidns.owner.mu.Lock() 441 defer pidns.owner.mu.Unlock() 442 443 // Check whether the process still exists or not. 444 if _, ok := pidns.tgids[tg]; !ok { 445 return linuxerr.ESRCH 446 } 447 448 // Lookup the ProcessGroup. 449 pg := pidns.processGroups[pgid] 450 if pg == nil { 451 return linuxerr.EPERM 452 } 453 454 // Disallow the join if an execve has performed, per POSIX. 455 if checkExec && tg.execed { 456 return linuxerr.EACCES 457 } 458 459 // See if it's in the same session as ours. 460 if pg.session != tg.processGroup.session { 461 return linuxerr.EPERM 462 } 463 464 // Join the group; adjust children. 465 parentPG := tg.parentPG() 466 pg.incRefWithParent(parentPG) 467 tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) { 468 childTG.processGroup.incRefWithParent(pg) 469 childTG.processGroup.decRefWithParent(tg.processGroup) 470 }) 471 tg.processGroup.decRefWithParent(parentPG) 472 tg.processGroup = pg 473 474 return nil 475 } 476 477 // Session returns the ThreadGroup's Session. 478 // 479 // A reference is not taken on the session. 480 func (tg *ThreadGroup) Session() *Session { 481 tg.pidns.owner.mu.RLock() 482 defer tg.pidns.owner.mu.RUnlock() 483 return tg.processGroup.session 484 } 485 486 // IDOfSession returns the Session assigned to s in PID namespace ns. 487 // 488 // If this group isn't visible in this namespace, zero will be returned. It is 489 // the callers responsibility to check that before using this function. 490 func (ns *PIDNamespace) IDOfSession(s *Session) SessionID { 491 ns.owner.mu.RLock() 492 defer ns.owner.mu.RUnlock() 493 return ns.sids[s] 494 } 495 496 // SessionWithID returns the Session with the given ID in the PID namespace ns, 497 // or nil if that given ID is not defined in this namespace. 498 // 499 // A reference is not taken on the session. 500 func (ns *PIDNamespace) SessionWithID(id SessionID) *Session { 501 ns.owner.mu.RLock() 502 defer ns.owner.mu.RUnlock() 503 return ns.sessions[id] 504 } 505 506 // ProcessGroup returns the ThreadGroup's ProcessGroup. 507 // 508 // A reference is not taken on the process group. 509 func (tg *ThreadGroup) ProcessGroup() *ProcessGroup { 510 tg.pidns.owner.mu.RLock() 511 defer tg.pidns.owner.mu.RUnlock() 512 return tg.processGroup 513 } 514 515 // IDOfProcessGroup returns the process group assigned to pg in PID namespace ns. 516 // 517 // The same constraints apply as IDOfSession. 518 func (ns *PIDNamespace) IDOfProcessGroup(pg *ProcessGroup) ProcessGroupID { 519 ns.owner.mu.RLock() 520 defer ns.owner.mu.RUnlock() 521 return ns.pgids[pg] 522 } 523 524 // ProcessGroupWithID returns the ProcessGroup with the given ID in the PID 525 // namespace ns, or nil if that given ID is not defined in this namespace. 526 // 527 // A reference is not taken on the process group. 528 func (ns *PIDNamespace) ProcessGroupWithID(id ProcessGroupID) *ProcessGroup { 529 ns.owner.mu.RLock() 530 defer ns.owner.mu.RUnlock() 531 return ns.processGroups[id] 532 }