github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/sessions.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 import ( 18 "github.com/SagerNet/gvisor/pkg/abi/linux" 19 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 20 "github.com/SagerNet/gvisor/pkg/syserror" 21 ) 22 23 // SessionID is the public identifier. 24 type SessionID ThreadID 25 26 // ProcessGroupID is the public identifier. 27 type ProcessGroupID ThreadID 28 29 // Session contains a leader threadgroup and a list of ProcessGroups. 30 // 31 // +stateify savable 32 type Session struct { 33 SessionRefs 34 35 // leader is the originator of the Session. 36 // 37 // Note that this may no longer be running (and may be reaped), so the 38 // ID is cached upon initial creation. The leader is still required 39 // however, since its PIDNamespace defines the scope of the Session. 40 // 41 // The leader is immutable. 42 leader *ThreadGroup 43 44 // id is the cached identifier in the leader's namespace. 45 // 46 // The id is immutable. 47 id SessionID 48 49 // foreground is the foreground process group. 50 // 51 // This is protected by TaskSet.mu. 52 foreground *ProcessGroup 53 54 // ProcessGroups is a list of process groups in this Session. This is 55 // protected by TaskSet.mu. 56 processGroups processGroupList 57 58 // sessionEntry is the embed for TaskSet.sessions. This is protected by 59 // TaskSet.mu. 60 sessionEntry 61 } 62 63 // DecRef drops a reference. 64 // 65 // Precondition: callers must hold TaskSet.mu for writing. 66 func (s *Session) DecRef() { 67 s.SessionRefs.DecRef(func() { 68 // Remove translations from the leader. 69 for ns := s.leader.pidns; ns != nil; ns = ns.parent { 70 id := ns.sids[s] 71 delete(ns.sids, s) 72 delete(ns.sessions, id) 73 } 74 75 // Remove from the list of global Sessions. 76 s.leader.pidns.owner.sessions.Remove(s) 77 }) 78 } 79 80 // ProcessGroup contains an originator threadgroup and a parent Session. 81 // 82 // +stateify savable 83 type ProcessGroup struct { 84 refs ProcessGroupRefs 85 86 // originator is the originator of the group. 87 // 88 // See note re: leader in Session. The same applies here. 89 // 90 // The originator is immutable. 91 originator *ThreadGroup 92 93 // id is the cached identifier in the originator's namespace. 94 // 95 // The id is immutable. 96 id ProcessGroupID 97 98 // Session is the parent Session. 99 // 100 // The session is immutable. 101 session *Session 102 103 // ancestors is the number of thread groups in this process group whose 104 // parent is in a different process group in the same session. 105 // 106 // The name is derived from the fact that process groups where 107 // ancestors is zero are considered "orphans". 108 // 109 // ancestors is protected by TaskSet.mu. 110 ancestors uint32 111 112 // processGroupEntry is the embedded entry for Sessions.groups. This is 113 // protected by TaskSet.mu. 114 processGroupEntry 115 } 116 117 // Originator retuns the originator of the process group. 118 func (pg *ProcessGroup) Originator() *ThreadGroup { 119 return pg.originator 120 } 121 122 // IsOrphan returns true if this process group is an orphan. 123 func (pg *ProcessGroup) IsOrphan() bool { 124 ts := pg.originator.TaskSet() 125 ts.mu.RLock() 126 defer ts.mu.RUnlock() 127 return pg.ancestors == 0 128 } 129 130 // incRefWithParent grabs a reference. 131 // 132 // This function is called when this ProcessGroup is being associated with some 133 // new ThreadGroup, tg. parentPG is the ProcessGroup of tg's parent 134 // ThreadGroup. If tg is init, then parentPG may be nil. 135 // 136 // Precondition: callers must hold TaskSet.mu for writing. 137 func (pg *ProcessGroup) incRefWithParent(parentPG *ProcessGroup) { 138 // We acquire an "ancestor" reference in the case of a nil parent. 139 // This is because the process being associated is init, and init can 140 // never be orphaned (we count it as always having an ancestor). 141 if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) { 142 pg.ancestors++ 143 } 144 145 pg.refs.IncRef() 146 } 147 148 // decRefWithParent drops a reference. 149 // 150 // parentPG is per incRefWithParent. 151 // 152 // Precondition: callers must hold TaskSet.mu for writing. 153 func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) { 154 // See incRefWithParent regarding parent == nil. 155 if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) { 156 pg.ancestors-- 157 } 158 159 alive := true 160 pg.refs.DecRef(func() { 161 alive = false // don't bother with handleOrphan. 162 163 // Remove translations from the originator. 164 for ns := pg.originator.pidns; ns != nil; ns = ns.parent { 165 id := ns.pgids[pg] 166 delete(ns.pgids, pg) 167 delete(ns.processGroups, id) 168 } 169 170 // Remove the list of process groups. 171 pg.session.processGroups.Remove(pg) 172 pg.session.DecRef() 173 }) 174 if alive { 175 pg.handleOrphan() 176 } 177 } 178 179 // parentPG returns the parent process group. 180 // 181 // Precondition: callers must hold TaskSet.mu. 182 func (tg *ThreadGroup) parentPG() *ProcessGroup { 183 if tg.leader.parent != nil { 184 return tg.leader.parent.tg.processGroup 185 } 186 return nil 187 } 188 189 // handleOrphan checks whether the process group is an orphan and has any 190 // stopped jobs. If yes, then appropriate signals are delivered to each thread 191 // group within the process group. 192 // 193 // Precondition: callers must hold TaskSet.mu for writing. 194 func (pg *ProcessGroup) handleOrphan() { 195 // Check if this process is an orphan. 196 if pg.ancestors != 0 { 197 return 198 } 199 200 // See if there are any stopped jobs. 201 hasStopped := false 202 pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) { 203 if tg.processGroup != pg { 204 return 205 } 206 tg.signalHandlers.mu.Lock() 207 if tg.groupStopComplete { 208 hasStopped = true 209 } 210 tg.signalHandlers.mu.Unlock() 211 }) 212 if !hasStopped { 213 return 214 } 215 216 // Deliver appropriate signals to all thread groups. 217 pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) { 218 if tg.processGroup != pg { 219 return 220 } 221 tg.signalHandlers.mu.Lock() 222 tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGHUP), true /* group */) 223 tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGCONT), true /* group */) 224 tg.signalHandlers.mu.Unlock() 225 }) 226 227 return 228 } 229 230 // Session returns the process group's session without taking a reference. 231 func (pg *ProcessGroup) Session() *Session { 232 return pg.session 233 } 234 235 // SendSignal sends a signal to all processes inside the process group. It is 236 // analagous to kernel/signal.c:kill_pgrp. 237 func (pg *ProcessGroup) SendSignal(info *linux.SignalInfo) error { 238 tasks := pg.originator.TaskSet() 239 tasks.mu.RLock() 240 defer tasks.mu.RUnlock() 241 242 var lastErr error 243 for tg := range tasks.Root.tgids { 244 if tg.processGroup == pg { 245 tg.signalHandlers.mu.Lock() 246 infoCopy := *info 247 if err := tg.leader.sendSignalLocked(&infoCopy, true /*group*/); err != nil { 248 lastErr = err 249 } 250 tg.signalHandlers.mu.Unlock() 251 } 252 } 253 return lastErr 254 } 255 256 // CreateSession creates a new Session, with the ThreadGroup as the leader. 257 // 258 // EPERM may be returned if either the given ThreadGroup is already a Session 259 // leader, or a ProcessGroup already exists for the ThreadGroup's ID. 260 func (tg *ThreadGroup) CreateSession() error { 261 tg.pidns.owner.mu.Lock() 262 defer tg.pidns.owner.mu.Unlock() 263 tg.signalHandlers.mu.Lock() 264 defer tg.signalHandlers.mu.Unlock() 265 return tg.createSession() 266 } 267 268 // createSession creates a new session for a threadgroup. 269 // 270 // Precondition: callers must hold TaskSet.mu and the signal mutex for writing. 271 func (tg *ThreadGroup) createSession() error { 272 // Get the ID for this thread in the current namespace. 273 id := tg.pidns.tgids[tg] 274 275 // Check if this ThreadGroup already leads a Session, or 276 // if the proposed group is already taken. 277 for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() { 278 if s.leader.pidns != tg.pidns { 279 continue 280 } 281 if s.leader == tg { 282 return linuxerr.EPERM 283 } 284 if s.id == SessionID(id) { 285 return linuxerr.EPERM 286 } 287 for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() { 288 if pg.id == ProcessGroupID(id) { 289 return linuxerr.EPERM 290 } 291 } 292 } 293 294 // Create a new Session, with a single reference. 295 s := &Session{ 296 id: SessionID(id), 297 leader: tg, 298 } 299 s.InitRefs() 300 301 // Create a new ProcessGroup, belonging to that Session. 302 // This also has a single reference (assigned below). 303 // 304 // Note that since this is a new session and a new process group, there 305 // will be zero ancestors for this process group. (It is an orphan at 306 // this point.) 307 pg := &ProcessGroup{ 308 id: ProcessGroupID(id), 309 originator: tg, 310 session: s, 311 ancestors: 0, 312 } 313 pg.refs.InitRefs() 314 315 // Tie them and return the result. 316 s.processGroups.PushBack(pg) 317 tg.pidns.owner.sessions.PushBack(s) 318 319 // Leave the current group, and assign the new one. 320 if tg.processGroup != nil { 321 oldParentPG := tg.parentPG() 322 tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) { 323 childTG.processGroup.incRefWithParent(pg) 324 childTG.processGroup.decRefWithParent(oldParentPG) 325 }) 326 // If tg.processGroup is an orphan, decRefWithParent will lock 327 // the signal mutex of each thread group in tg.processGroup. 328 // However, tg's signal mutex may already be locked at this 329 // point. We change tg's process group before calling 330 // decRefWithParent to avoid locking tg's signal mutex twice. 331 oldPG := tg.processGroup 332 tg.processGroup = pg 333 oldPG.decRefWithParent(oldParentPG) 334 } else { 335 // The current process group may be nil only in the case of an 336 // unparented thread group (i.e. the init process). This would 337 // not normally occur, but we allow it for the convenience of 338 // CreateSession working from that point. There will be no 339 // child processes. We always say that the very first group 340 // created has ancestors (avoids checks elsewhere). 341 // 342 // Note that this mirrors the parent == nil logic in 343 // incRef/decRef/reparent, which counts nil as an ancestor. 344 tg.processGroup = pg 345 tg.processGroup.ancestors++ 346 } 347 348 // Ensure a translation is added to all namespaces. 349 for ns := tg.pidns; ns != nil; ns = ns.parent { 350 local := ns.tgids[tg] 351 ns.sids[s] = SessionID(local) 352 ns.sessions[SessionID(local)] = s 353 ns.pgids[pg] = ProcessGroupID(local) 354 ns.processGroups[ProcessGroupID(local)] = pg 355 } 356 357 // Disconnect from the controlling terminal. 358 tg.tty = nil 359 360 return nil 361 } 362 363 // CreateProcessGroup creates a new process group. 364 // 365 // An EPERM error will be returned if the ThreadGroup belongs to a different 366 // Session, is a Session leader or the group already exists. 367 func (tg *ThreadGroup) CreateProcessGroup() error { 368 tg.pidns.owner.mu.Lock() 369 defer tg.pidns.owner.mu.Unlock() 370 371 // Get the ID for this thread in the current namespace. 372 id := tg.pidns.tgids[tg] 373 374 // Check whether a process still exists or not. 375 if id == 0 { 376 return syserror.ESRCH 377 } 378 379 // Per above, check for a Session leader or existing group. 380 for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() { 381 if s.leader.pidns != tg.pidns { 382 continue 383 } 384 if s.leader == tg { 385 return linuxerr.EPERM 386 } 387 for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() { 388 if pg.id == ProcessGroupID(id) { 389 return linuxerr.EPERM 390 } 391 } 392 } 393 394 // Create a new ProcessGroup, belonging to the current Session. 395 // 396 // We manually adjust the ancestors if the parent is in the same 397 // session. 398 tg.processGroup.session.IncRef() 399 pg := ProcessGroup{ 400 id: ProcessGroupID(id), 401 originator: tg, 402 session: tg.processGroup.session, 403 } 404 pg.refs.InitRefs() 405 406 if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session { 407 pg.ancestors++ 408 } 409 410 // Assign the new process group; adjust children. 411 oldParentPG := tg.parentPG() 412 tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) { 413 childTG.processGroup.incRefWithParent(&pg) 414 childTG.processGroup.decRefWithParent(oldParentPG) 415 }) 416 tg.processGroup.decRefWithParent(oldParentPG) 417 tg.processGroup = &pg 418 419 // Add the new process group to the session. 420 pg.session.processGroups.PushBack(&pg) 421 422 // Ensure this translation is added to all namespaces. 423 for ns := tg.pidns; ns != nil; ns = ns.parent { 424 local := ns.tgids[tg] 425 ns.pgids[&pg] = ProcessGroupID(local) 426 ns.processGroups[ProcessGroupID(local)] = &pg 427 } 428 429 return nil 430 } 431 432 // JoinProcessGroup joins an existing process group. 433 // 434 // This function will return EACCES if an exec has been performed since fork 435 // by the given ThreadGroup, and EPERM if the Sessions are not the same or the 436 // group does not exist. 437 // 438 // If checkExec is set, then the join is not permitted after the process has 439 // executed exec at least once. 440 func (tg *ThreadGroup) JoinProcessGroup(pidns *PIDNamespace, pgid ProcessGroupID, checkExec bool) error { 441 pidns.owner.mu.Lock() 442 defer pidns.owner.mu.Unlock() 443 444 // Lookup the ProcessGroup. 445 pg := pidns.processGroups[pgid] 446 if pg == nil { 447 return linuxerr.EPERM 448 } 449 450 // Disallow the join if an execve has performed, per POSIX. 451 if checkExec && tg.execed { 452 return linuxerr.EACCES 453 } 454 455 // See if it's in the same session as ours. 456 if pg.session != tg.processGroup.session { 457 return linuxerr.EPERM 458 } 459 460 // Join the group; adjust children. 461 parentPG := tg.parentPG() 462 pg.incRefWithParent(parentPG) 463 tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) { 464 childTG.processGroup.incRefWithParent(pg) 465 childTG.processGroup.decRefWithParent(tg.processGroup) 466 }) 467 tg.processGroup.decRefWithParent(parentPG) 468 tg.processGroup = pg 469 470 return nil 471 } 472 473 // Session returns the ThreadGroup's Session. 474 // 475 // A reference is not taken on the session. 476 func (tg *ThreadGroup) Session() *Session { 477 tg.pidns.owner.mu.RLock() 478 defer tg.pidns.owner.mu.RUnlock() 479 return tg.processGroup.session 480 } 481 482 // IDOfSession returns the Session assigned to s in PID namespace ns. 483 // 484 // If this group isn't visible in this namespace, zero will be returned. It is 485 // the callers responsibility to check that before using this function. 486 func (ns *PIDNamespace) IDOfSession(s *Session) SessionID { 487 ns.owner.mu.RLock() 488 defer ns.owner.mu.RUnlock() 489 return ns.sids[s] 490 } 491 492 // SessionWithID returns the Session with the given ID in the PID namespace ns, 493 // or nil if that given ID is not defined in this namespace. 494 // 495 // A reference is not taken on the session. 496 func (ns *PIDNamespace) SessionWithID(id SessionID) *Session { 497 ns.owner.mu.RLock() 498 defer ns.owner.mu.RUnlock() 499 return ns.sessions[id] 500 } 501 502 // ProcessGroup returns the ThreadGroup's ProcessGroup. 503 // 504 // A reference is not taken on the process group. 505 func (tg *ThreadGroup) ProcessGroup() *ProcessGroup { 506 tg.pidns.owner.mu.RLock() 507 defer tg.pidns.owner.mu.RUnlock() 508 return tg.processGroup 509 } 510 511 // IDOfProcessGroup returns the process group assigned to pg in PID namespace ns. 512 // 513 // The same constraints apply as IDOfSession. 514 func (ns *PIDNamespace) IDOfProcessGroup(pg *ProcessGroup) ProcessGroupID { 515 ns.owner.mu.RLock() 516 defer ns.owner.mu.RUnlock() 517 return ns.pgids[pg] 518 } 519 520 // ProcessGroupWithID returns the ProcessGroup with the given ID in the PID 521 // namespace ns, or nil if that given ID is not defined in this namespace. 522 // 523 // A reference is not taken on the process group. 524 func (ns *PIDNamespace) ProcessGroupWithID(id ProcessGroupID) *ProcessGroup { 525 ns.owner.mu.RLock() 526 defer ns.owner.mu.RUnlock() 527 return ns.processGroups[id] 528 }