github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/kernel/task_exit.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 // This file implements the task exit cycle: 18 // 19 // - Tasks are asynchronously requested to exit with Task.Kill. 20 // 21 // - When able, the task goroutine enters the exit path starting from state 22 // runExit. 23 // 24 // - Other tasks observe completed exits with Task.Wait (which implements the 25 // wait*() family of syscalls). 26 27 import ( 28 "errors" 29 "fmt" 30 "strconv" 31 32 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 33 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 34 "github.com/MerlinKodo/gvisor/pkg/log" 35 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 36 "github.com/MerlinKodo/gvisor/pkg/sentry/seccheck" 37 pb "github.com/MerlinKodo/gvisor/pkg/sentry/seccheck/points/points_go_proto" 38 "github.com/MerlinKodo/gvisor/pkg/waiter" 39 ) 40 41 // TaskExitState represents a step in the task exit path. 42 // 43 // "Exiting" and "exited" are often ambiguous; prefer to name specific states. 44 type TaskExitState int 45 46 const ( 47 // TaskExitNone indicates that the task has not begun exiting. 48 TaskExitNone TaskExitState = iota 49 50 // TaskExitInitiated indicates that the task goroutine has entered the exit 51 // path, and the task is no longer eligible to participate in group stops 52 // or group signal handling. TaskExitInitiated is analogous to Linux's 53 // PF_EXITING. 54 TaskExitInitiated 55 56 // TaskExitZombie indicates that the task has released its resources, and 57 // the task no longer prevents a sibling thread from completing execve. 58 TaskExitZombie 59 60 // TaskExitDead indicates that the task's thread IDs have been released, 61 // and the task no longer prevents its thread group leader from being 62 // reaped. ("Reaping" refers to the transitioning of a task from 63 // TaskExitZombie to TaskExitDead.) 64 TaskExitDead 65 ) 66 67 // String implements fmt.Stringer. 68 func (t TaskExitState) String() string { 69 switch t { 70 case TaskExitNone: 71 return "TaskExitNone" 72 case TaskExitInitiated: 73 return "TaskExitInitiated" 74 case TaskExitZombie: 75 return "TaskExitZombie" 76 case TaskExitDead: 77 return "TaskExitDead" 78 default: 79 return strconv.Itoa(int(t)) 80 } 81 } 82 83 // killLocked marks t as killed by enqueueing a SIGKILL, without causing the 84 // thread-group-affecting side effects SIGKILL usually has. 85 // 86 // Preconditions: The signal mutex must be locked. 87 func (t *Task) killLocked() { 88 // Clear killable stops. 89 if t.stop != nil && t.stop.Killable() { 90 t.endInternalStopLocked() 91 } 92 t.pendingSignals.enqueue(&linux.SignalInfo{ 93 Signo: int32(linux.SIGKILL), 94 // Linux just sets SIGKILL in the pending signal bitmask without 95 // enqueueing an actual siginfo, such that 96 // kernel/signal.c:collect_signal() initializes si_code to SI_USER. 97 Code: linux.SI_USER, 98 }, nil) 99 t.interrupt() 100 } 101 102 // killed returns true if t has a SIGKILL pending. killed is analogous to 103 // Linux's fatal_signal_pending(). 104 // 105 // Preconditions: The caller must be running on the task goroutine. 106 func (t *Task) killed() bool { 107 t.tg.signalHandlers.mu.Lock() 108 defer t.tg.signalHandlers.mu.Unlock() 109 return t.killedLocked() 110 } 111 112 func (t *Task) killedLocked() bool { 113 return t.pendingSignals.pendingSet&linux.SignalSetOf(linux.SIGKILL) != 0 114 } 115 116 // PrepareExit indicates an exit with the given status. 117 // 118 // Preconditions: The caller must be running on the task goroutine. 119 func (t *Task) PrepareExit(ws linux.WaitStatus) { 120 t.tg.pidns.owner.mu.RLock() 121 defer t.tg.pidns.owner.mu.RUnlock() 122 t.tg.signalHandlers.mu.Lock() 123 defer t.tg.signalHandlers.mu.Unlock() 124 125 last := t.tg.activeTasks == 1 126 if last { 127 t.prepareGroupExitLocked(ws) 128 return 129 } 130 131 t.exitStatus = ws 132 } 133 134 // PrepareGroupExit indicates a group exit with status es to t's thread group. 135 // 136 // PrepareGroupExit is analogous to Linux's do_group_exit(), except that it 137 // does not tail-call do_exit(), except that it *does* set Task.exitStatus. 138 // (Linux does not do so until within do_exit(), since it reuses exit_code for 139 // ptrace.) 140 // 141 // Preconditions: The caller must be running on the task goroutine. 142 func (t *Task) PrepareGroupExit(ws linux.WaitStatus) { 143 t.tg.signalHandlers.mu.Lock() 144 defer t.tg.signalHandlers.mu.Unlock() 145 t.prepareGroupExitLocked(ws) 146 } 147 148 // Preconditions: 149 // - The caller must be running on the task goroutine. 150 // - The signal mutex must be locked. 151 func (t *Task) prepareGroupExitLocked(ws linux.WaitStatus) { 152 if t.tg.exiting || t.tg.execing != nil { 153 // Note that if t.tg.exiting is false but t.tg.execing is not nil, i.e. 154 // this "group exit" is being executed by the killed sibling of an 155 // execing task, then Task.Execve never set t.tg.exitStatus, so it's 156 // still the zero value. This is consistent with Linux, both in intent 157 // ("all other threads ... report death as if they exited via _exit(2) 158 // with exit code 0" - ptrace(2), "execve under ptrace") and in 159 // implementation (compare fs/exec.c:de_thread() => 160 // kernel/signal.c:zap_other_threads() and 161 // kernel/exit.c:do_group_exit() => 162 // include/linux/sched.h:signal_group_exit()). 163 t.exitStatus = t.tg.exitStatus 164 return 165 } 166 t.tg.exiting = true 167 t.tg.exitStatus = ws 168 t.exitStatus = ws 169 for sibling := t.tg.tasks.Front(); sibling != nil; sibling = sibling.Next() { 170 if sibling != t { 171 sibling.killLocked() 172 } 173 } 174 } 175 176 // Kill requests that all tasks in ts exit as if group exiting with status ws. 177 // Kill does not wait for tasks to exit. 178 // 179 // Kill has no analogue in Linux; it's provided for save/restore only. 180 func (ts *TaskSet) Kill(ws linux.WaitStatus) { 181 ts.mu.Lock() 182 defer ts.mu.Unlock() 183 ts.Root.exiting = true 184 for t := range ts.Root.tids { 185 t.tg.signalHandlers.mu.Lock() 186 if !t.tg.exiting { 187 t.tg.exiting = true 188 t.tg.exitStatus = ws 189 } 190 t.killLocked() 191 t.tg.signalHandlers.mu.Unlock() 192 } 193 } 194 195 // advanceExitStateLocked checks that t's current exit state is oldExit, then 196 // sets it to newExit. If t's current exit state is not oldExit, 197 // advanceExitStateLocked panics. 198 // 199 // Preconditions: The TaskSet mutex must be locked. 200 func (t *Task) advanceExitStateLocked(oldExit, newExit TaskExitState) { 201 if t.exitState != oldExit { 202 panic(fmt.Sprintf("Transitioning from exit state %v to %v: unexpected preceding state %v", oldExit, newExit, t.exitState)) 203 } 204 t.Debugf("Transitioning from exit state %v to %v", oldExit, newExit) 205 t.exitState = newExit 206 } 207 208 // runExit is the entry point into the task exit path. 209 // 210 // +stateify savable 211 type runExit struct{} 212 213 func (*runExit) execute(t *Task) taskRunState { 214 t.ptraceExit() 215 return (*runExitMain)(nil) 216 } 217 218 // +stateify savable 219 type runExitMain struct{} 220 221 func (*runExitMain) execute(t *Task) taskRunState { 222 t.traceExitEvent() 223 224 if seccheck.Global.Enabled(seccheck.PointTaskExit) { 225 info := &pb.TaskExit{ 226 ExitStatus: int32(t.tg.exitStatus), 227 } 228 fields := seccheck.Global.GetFieldSet(seccheck.PointTaskExit) 229 if !fields.Context.Empty() { 230 info.ContextData = &pb.ContextData{} 231 LoadSeccheckData(t, fields.Context, info.ContextData) 232 } 233 seccheck.Global.SentToSinks(func(c seccheck.Sink) error { 234 return c.TaskExit(t, fields, info) 235 }) 236 } 237 238 lastExiter := t.exitThreadGroup() 239 240 t.ResetKcov() 241 242 // If the task has a cleartid, and the thread group wasn't killed by a 243 // signal, handle that before releasing the MM. 244 if t.cleartid != 0 { 245 t.tg.signalHandlers.mu.Lock() 246 signaled := t.tg.exiting && t.tg.exitStatus.Signaled() 247 t.tg.signalHandlers.mu.Unlock() 248 if !signaled { 249 zero := ThreadID(0) 250 if _, err := zero.CopyOut(t, t.cleartid); err == nil { 251 t.Futex().Wake(t, t.cleartid, false, ^uint32(0), 1) 252 } 253 // If the CopyOut fails, there's nothing we can do. 254 } 255 } 256 257 // Handle the robust futex list. 258 t.exitRobustList() 259 260 // Deactivate the address space and update max RSS before releasing the 261 // task's MM. 262 t.Deactivate() 263 t.tg.pidns.owner.mu.Lock() 264 t.updateRSSLocked() 265 t.tg.pidns.owner.mu.Unlock() 266 267 // Release the task image resources. Accessing these fields must be 268 // done with t.mu held, but the mm.DecUsers() call must be done outside 269 // of that lock. 270 t.mu.Lock() 271 mm := t.image.MemoryManager 272 t.image.MemoryManager = nil 273 t.image.fu = nil 274 t.mu.Unlock() 275 mm.DecUsers(t) 276 277 // Releasing the MM unblocks a blocked CLONE_VFORK parent. 278 t.unstopVforkParent() 279 280 t.fsContext.DecRef(t) 281 t.fdTable.DecRef(t) 282 283 // Detach task from all cgroups. This must happen before potentially the 284 // last ref to the cgroupfs mount is dropped below. 285 t.LeaveCgroups() 286 287 t.mu.Lock() 288 mntns := t.mountNamespace 289 t.mountNamespace = nil 290 utsns := t.utsns 291 t.utsns = nil 292 ipcns := t.ipcns 293 t.ipcns = nil 294 netns := t.netns 295 t.netns = nil 296 t.mu.Unlock() 297 mntns.DecRef(t) 298 utsns.DecRef(t) 299 ipcns.DecRef(t) 300 netns.DecRef(t) 301 302 // If this is the last task to exit from the thread group, release the 303 // thread group's resources. 304 if lastExiter { 305 t.tg.Release(t) 306 } 307 308 // Detach tracees. 309 t.exitPtrace() 310 311 // Reparent the task's children. 312 t.exitChildren() 313 314 // Don't tail-call runExitNotify, as exitChildren may have initiated a stop 315 // to wait for a PID namespace to die. 316 return (*runExitNotify)(nil) 317 } 318 319 // exitThreadGroup transitions t to TaskExitInitiated, indicating to t's thread 320 // group that it is no longer eligible to participate in group activities. It 321 // returns true if t is the last task in its thread group to call 322 // exitThreadGroup. 323 func (t *Task) exitThreadGroup() bool { 324 t.tg.pidns.owner.mu.Lock() 325 defer t.tg.pidns.owner.mu.Unlock() 326 t.tg.signalHandlers.mu.Lock() 327 // Can't defer unlock: see below. 328 329 t.advanceExitStateLocked(TaskExitNone, TaskExitInitiated) 330 t.tg.activeTasks-- 331 last := t.tg.activeTasks == 0 332 333 // Ensure that someone will handle the signals we can't. 334 t.setSignalMaskLocked(^linux.SignalSet(0)) 335 336 // Check if this task's exit interacts with an initiated group stop. 337 if !t.groupStopPending { 338 t.tg.signalHandlers.mu.Unlock() 339 return last 340 } 341 t.groupStopPending = false 342 sig := t.tg.groupStopSignal 343 notifyParent := t.participateGroupStopLocked() 344 // signalStop must be called with t's signal mutex unlocked. 345 t.tg.signalHandlers.mu.Unlock() 346 if notifyParent && t.tg.leader.parent != nil { 347 t.tg.leader.parent.signalStop(t, linux.CLD_STOPPED, int32(sig)) 348 t.tg.leader.parent.tg.eventQueue.Notify(EventChildGroupStop) 349 } 350 return last 351 } 352 353 func (t *Task) exitChildren() { 354 t.tg.pidns.owner.mu.Lock() 355 defer t.tg.pidns.owner.mu.Unlock() 356 newParent := t.findReparentTargetLocked() 357 if newParent == nil { 358 // "If the init process of a PID namespace terminates, the kernel 359 // terminates all of the processes in the namespace via a SIGKILL 360 // signal." - pid_namespaces(7) 361 t.Debugf("Init process terminating, killing namespace") 362 t.tg.pidns.exiting = true 363 for other := range t.tg.pidns.tgids { 364 if other == t.tg { 365 continue 366 } 367 other.signalHandlers.mu.Lock() 368 other.leader.sendSignalLocked(&linux.SignalInfo{ 369 Signo: int32(linux.SIGKILL), 370 }, true /* group */) 371 other.signalHandlers.mu.Unlock() 372 } 373 // TODO(b/37722272): The init process waits for all processes in the 374 // namespace to exit before completing its own exit 375 // (kernel/pid_namespace.c:zap_pid_ns_processes()). Stop until all 376 // other tasks in the namespace are dead, except possibly for this 377 // thread group's leader (which can't be reaped until this task exits). 378 } 379 // This is correct even if newParent is nil (it ensures that children don't 380 // wait for a parent to reap them.) 381 for c := range t.children { 382 if sig := c.ParentDeathSignal(); sig != 0 { 383 siginfo := &linux.SignalInfo{ 384 Signo: int32(sig), 385 Code: linux.SI_USER, 386 } 387 siginfo.SetPID(int32(c.tg.pidns.tids[t])) 388 siginfo.SetUID(int32(t.Credentials().RealKUID.In(c.UserNamespace()).OrOverflow())) 389 c.tg.signalHandlers.mu.Lock() 390 c.sendSignalLocked(siginfo, true /* group */) 391 c.tg.signalHandlers.mu.Unlock() 392 } 393 c.reparentLocked(newParent) 394 if newParent != nil { 395 newParent.children[c] = struct{}{} 396 } 397 } 398 } 399 400 // findReparentTargetLocked returns the task to which t's children should be 401 // reparented. If no such task exists, findNewParentLocked returns nil. 402 // 403 // This corresponds to Linux's find_new_reaper(). 404 // 405 // Preconditions: The TaskSet mutex must be locked. 406 func (t *Task) findReparentTargetLocked() *Task { 407 // Reparent to any sibling in the same thread group that hasn't begun 408 // exiting. 409 if t2 := t.tg.anyNonExitingTaskLocked(); t2 != nil { 410 return t2 411 } 412 413 if !t.tg.hasChildSubreaper { 414 // No child subreaper exists. We can immediately return the 415 // init process in this PID namespace if it exists. 416 if init := t.tg.pidns.tasks[initTID]; init != nil { 417 return init.tg.anyNonExitingTaskLocked() 418 } 419 return nil 420 } 421 422 // Walk up the process tree until we either find a subreaper, or we hit 423 // the init process in the PID namespace. 424 for parent := t.parent; parent != nil; parent = parent.parent { 425 if parent.tg.isInitInLocked(parent.PIDNamespace()) { 426 // We found the init process for this pid namespace, 427 // return a task from it. If the init process is 428 // exiting, this might return nil. 429 return parent.tg.anyNonExitingTaskLocked() 430 } 431 if parent.tg.isChildSubreaper { 432 // We found a subreaper process. Return a non-exiting 433 // task if there is one, otherwise keep walking up the 434 // process tree. 435 if target := parent.tg.anyNonExitingTaskLocked(); target != nil { 436 return target 437 } 438 } 439 } 440 441 return nil 442 } 443 444 func (tg *ThreadGroup) anyNonExitingTaskLocked() *Task { 445 for t := tg.tasks.Front(); t != nil; t = t.Next() { 446 if t.exitState == TaskExitNone { 447 return t 448 } 449 } 450 return nil 451 } 452 453 // reparentLocked changes t's parent. The new parent may be nil. 454 // 455 // Preconditions: The TaskSet mutex must be locked for writing. 456 func (t *Task) reparentLocked(parent *Task) { 457 oldParent := t.parent 458 t.parent = parent 459 if oldParent != nil { 460 delete(oldParent.children, t) 461 } 462 if parent != nil { 463 parent.children[t] = struct{}{} 464 } 465 // If a thread group leader's parent changes, reset the thread group's 466 // termination signal to SIGCHLD and re-check exit notification. (Compare 467 // kernel/exit.c:reparent_leader().) 468 if t != t.tg.leader { 469 return 470 } 471 if oldParent == nil && parent == nil { 472 return 473 } 474 if oldParent != nil && parent != nil && oldParent.tg == parent.tg { 475 return 476 } 477 t.tg.terminationSignal = linux.SIGCHLD 478 if t.exitParentNotified && !t.exitParentAcked { 479 t.exitParentNotified = false 480 t.exitNotifyLocked(false) 481 } 482 } 483 484 // When a task exits, other tasks in the system, notably the task's parent and 485 // ptracer, may want to be notified. The exit notification system ensures that 486 // interested tasks receive signals and/or are woken from blocking calls to 487 // wait*() syscalls; these notifications must be resolved before exiting tasks 488 // can be reaped and disappear from the system. 489 // 490 // Each task may have a parent task and/or a tracer task. If both a parent and 491 // a tracer exist, they may be the same task, different tasks in the same 492 // thread group, or tasks in different thread groups. (In the last case, Linux 493 // refers to the task as being ptrace-reparented due to an implementation 494 // detail; we avoid this terminology to avoid confusion.) 495 // 496 // A thread group is *empty* if all non-leader tasks in the thread group are 497 // dead, and the leader is either a zombie or dead. The exit of a thread group 498 // leader is never waitable - by either the parent or tracer - until the thread 499 // group is empty. 500 // 501 // There are a few ways for an exit notification to be resolved: 502 // 503 // - The exit notification may be acknowledged by a call to Task.Wait with 504 // WaitOptions.ConsumeEvent set (e.g. due to a wait4() syscall). 505 // 506 // - If the notified party is the parent, and the parent thread group is not 507 // also the tracer thread group, and the notification signal is SIGCHLD, the 508 // parent may explicitly ignore the notification (see quote in exitNotify). 509 // Note that it's possible for the notified party to ignore the signal in other 510 // cases, but the notification is only resolved under the above conditions. 511 // (Actually, there is one exception; see the last paragraph of the "leader, 512 // has tracer, tracer thread group is parent thread group" case below.) 513 // 514 // - If the notified party is the parent, and the parent does not exist, the 515 // notification is resolved as if ignored. (This is only possible in the 516 // sentry. In Linux, the only task / thread group without a parent is global 517 // init, and killing global init causes a kernel panic.) 518 // 519 // - If the notified party is a tracer, the tracer may detach the traced task. 520 // (Zombie tasks cannot be ptrace-attached, so the reverse is not possible.) 521 // 522 // In addition, if the notified party is the parent, the parent may exit and 523 // cause the notifying task to be reparented to another thread group. This does 524 // not resolve the notification; instead, the notification must be resent to 525 // the new parent. 526 // 527 // The series of notifications generated for a given task's exit depend on 528 // whether it is a thread group leader; whether the task is ptraced; and, if 529 // so, whether the tracer thread group is the same as the parent thread group. 530 // 531 // - Non-leader, no tracer: No notification is generated; the task is reaped 532 // immediately. 533 // 534 // - Non-leader, has tracer: SIGCHLD is sent to the tracer. When the tracer 535 // notification is resolved (by waiting or detaching), the task is reaped. (For 536 // non-leaders, whether the tracer and parent thread groups are the same is 537 // irrelevant.) 538 // 539 // - Leader, no tracer: The task remains a zombie, with no notification sent, 540 // until all other tasks in the thread group are dead. (In Linux terms, this 541 // condition is indicated by include/linux/sched.h:thread_group_empty(); tasks 542 // are removed from their thread_group list in kernel/exit.c:release_task() => 543 // __exit_signal() => __unhash_process().) Then the thread group's termination 544 // signal is sent to the parent. When the parent notification is resolved (by 545 // waiting or ignoring), the task is reaped. 546 // 547 // - Leader, has tracer, tracer thread group is not parent thread group: 548 // SIGCHLD is sent to the tracer. When the tracer notification is resolved (by 549 // waiting or detaching), and all other tasks in the thread group are dead, the 550 // thread group's termination signal is sent to the parent. (Note that the 551 // tracer cannot resolve the exit notification by waiting until the thread 552 // group is empty.) When the parent notification is resolved, the task is 553 // reaped. 554 // 555 // - Leader, has tracer, tracer thread group is parent thread group: 556 // 557 // If all other tasks in the thread group are dead, the thread group's 558 // termination signal is sent to the parent. At this point, the notification 559 // can only be resolved by waiting. If the parent detaches from the task as a 560 // tracer, the notification is not resolved, but the notification can now be 561 // resolved by waiting or ignoring. When the parent notification is resolved, 562 // the task is reaped. 563 // 564 // If at least one task in the thread group is not dead, SIGCHLD is sent to the 565 // parent. At this point, the notification cannot be resolved at all; once the 566 // thread group becomes empty, it can be resolved only by waiting. If the 567 // parent detaches from the task as a tracer before all remaining tasks die, 568 // then exit notification proceeds as in the case where the leader never had a 569 // tracer. If the parent detaches from the task as a tracer after all remaining 570 // tasks die, the notification is not resolved, but the notification can now be 571 // resolved by waiting or ignoring. When the parent notification is resolved, 572 // the task is reaped. 573 // 574 // In both of the above cases, when the parent detaches from the task as a 575 // tracer while the thread group is empty, whether or not the parent resolves 576 // the notification by ignoring it is based on the parent's SIGCHLD signal 577 // action, whether or not the thread group's termination signal is SIGCHLD 578 // (Linux: kernel/ptrace.c:__ptrace_detach() => ignoring_children()). 579 // 580 // There is one final wrinkle: A leader can become a non-leader due to a 581 // sibling execve. In this case, the execing thread detaches the leader's 582 // tracer (if one exists) and reaps the leader immediately. In Linux, this is 583 // in fs/exec.c:de_thread(); in the sentry, this is in Task.promoteLocked(). 584 585 // +stateify savable 586 type runExitNotify struct{} 587 588 func (*runExitNotify) execute(t *Task) taskRunState { 589 t.tg.pidns.owner.mu.Lock() 590 defer t.tg.pidns.owner.mu.Unlock() 591 t.advanceExitStateLocked(TaskExitInitiated, TaskExitZombie) 592 t.tg.liveTasks-- 593 // Check if this completes a sibling's execve. 594 if t.tg.execing != nil && t.tg.liveTasks == 1 { 595 // execing blocks the addition of new tasks to the thread group, so 596 // the sole living task must be the execing one. 597 e := t.tg.execing 598 e.tg.signalHandlers.mu.Lock() 599 if _, ok := e.stop.(*execStop); ok { 600 e.endInternalStopLocked() 601 } 602 e.tg.signalHandlers.mu.Unlock() 603 } 604 t.exitNotifyLocked(false) 605 // The task goroutine will now exit. 606 return nil 607 } 608 609 // exitNotifyLocked is called after changes to t's state that affect exit 610 // notification. 611 // 612 // If fromPtraceDetach is true, the caller is ptraceDetach or exitPtrace; 613 // thanks to Linux's haphazard implementation of this functionality, such cases 614 // determine whether parent notifications are ignored based on the parent's 615 // handling of SIGCHLD, regardless of what the exited task's thread group's 616 // termination signal is. 617 // 618 // Preconditions: The TaskSet mutex must be locked for writing. 619 func (t *Task) exitNotifyLocked(fromPtraceDetach bool) { 620 if t.exitState != TaskExitZombie { 621 return 622 } 623 if !t.exitTracerNotified { 624 t.exitTracerNotified = true 625 tracer := t.Tracer() 626 if tracer == nil { 627 t.exitTracerAcked = true 628 } else if t != t.tg.leader || t.parent == nil || tracer.tg != t.parent.tg { 629 // Don't set exitParentNotified if t is non-leader, even if the 630 // tracer is in the parent thread group, so that if the parent 631 // detaches the following call to exitNotifyLocked passes through 632 // the !exitParentNotified case below and causes t to be reaped 633 // immediately. 634 // 635 // Tracer notification doesn't care about about 636 // SIG_IGN/SA_NOCLDWAIT. 637 tracer.tg.signalHandlers.mu.Lock() 638 tracer.sendSignalLocked(t.exitNotificationSignal(linux.SIGCHLD, tracer), true /* group */) 639 tracer.tg.signalHandlers.mu.Unlock() 640 // Wake EventTraceeStop waiters as well since this task will never 641 // ptrace-stop again. 642 tracer.tg.eventQueue.Notify(EventExit | EventTraceeStop) 643 } else { 644 // t is a leader and the tracer is in the parent thread group. 645 t.exitParentNotified = true 646 sig := linux.SIGCHLD 647 if t.tg.tasksCount == 1 { 648 sig = t.tg.terminationSignal 649 } 650 // This notification doesn't care about SIG_IGN/SA_NOCLDWAIT either 651 // (in Linux, the check in do_notify_parent() is gated by 652 // !tsk->ptrace.) 653 t.parent.tg.signalHandlers.mu.Lock() 654 t.parent.sendSignalLocked(t.exitNotificationSignal(sig, t.parent), true /* group */) 655 t.parent.tg.signalHandlers.mu.Unlock() 656 // See below for rationale for this event mask. 657 t.parent.tg.eventQueue.Notify(EventExit | EventChildGroupStop | EventGroupContinue) 658 } 659 } 660 if t.exitTracerAcked && !t.exitParentNotified { 661 if t != t.tg.leader { 662 t.exitParentNotified = true 663 t.exitParentAcked = true 664 } else if t.tg.tasksCount == 1 { 665 t.exitParentNotified = true 666 if t.parent == nil { 667 t.exitParentAcked = true 668 } else { 669 // "POSIX.1-2001 specifies that if the disposition of SIGCHLD is 670 // set to SIG_IGN or the SA_NOCLDWAIT flag is set for SIGCHLD (see 671 // sigaction(2)), then children that terminate do not become 672 // zombies and a call to wait() or waitpid() will block until all 673 // children have terminated, and then fail with errno set to 674 // ECHILD. (The original POSIX standard left the behavior of 675 // setting SIGCHLD to SIG_IGN unspecified. Note that even though 676 // the default disposition of SIGCHLD is "ignore", explicitly 677 // setting the disposition to SIG_IGN results in different 678 // treatment of zombie process children.) Linux 2.6 conforms to 679 // this specification." - wait(2) 680 // 681 // Some undocumented Linux-specific details: 682 // 683 // - All of the above is ignored if the termination signal isn't 684 // SIGCHLD. 685 // 686 // - SA_NOCLDWAIT causes the leader to be immediately reaped, but 687 // does not suppress the SIGCHLD. 688 signalParent := t.tg.terminationSignal.IsValid() 689 t.parent.tg.signalHandlers.mu.Lock() 690 if t.tg.terminationSignal == linux.SIGCHLD || fromPtraceDetach { 691 if act, ok := t.parent.tg.signalHandlers.actions[linux.SIGCHLD]; ok { 692 if act.Handler == linux.SIG_IGN { 693 t.exitParentAcked = true 694 signalParent = false 695 } else if act.Flags&linux.SA_NOCLDWAIT != 0 { 696 t.exitParentAcked = true 697 } 698 } 699 } 700 if signalParent { 701 t.parent.tg.leader.sendSignalLocked(t.exitNotificationSignal(t.tg.terminationSignal, t.parent), true /* group */) 702 } 703 t.parent.tg.signalHandlers.mu.Unlock() 704 // If a task in the parent was waiting for a child group stop 705 // or continue, it needs to be notified of the exit, because 706 // there may be no remaining eligible tasks (so that wait 707 // should return ECHILD). 708 t.parent.tg.eventQueue.Notify(EventExit | EventChildGroupStop | EventGroupContinue) 709 } 710 711 // We don't send exit events for the root process because we don't send 712 // Clone or Exec events for the initial process. 713 if t.tg != t.k.globalInit && seccheck.Global.Enabled(seccheck.PointExitNotifyParent) { 714 mask, info := getExitNotifyParentSeccheckInfo(t) 715 if err := seccheck.Global.SentToSinks(func(c seccheck.Sink) error { 716 return c.ExitNotifyParent(t, mask, info) 717 }); err != nil { 718 log.Infof("Ignoring error from ExitNotifyParent point: %v", err) 719 } 720 } 721 } 722 } 723 if t.exitTracerAcked && t.exitParentAcked { 724 t.advanceExitStateLocked(TaskExitZombie, TaskExitDead) 725 for ns := t.tg.pidns; ns != nil; ns = ns.parent { 726 ns.deleteTask(t) 727 } 728 t.userCounters.decRLimitNProc() 729 t.tg.exitedCPUStats.Accumulate(t.CPUStats()) 730 t.tg.ioUsage.Accumulate(t.ioUsage) 731 t.tg.signalHandlers.mu.Lock() 732 t.tg.tasks.Remove(t) 733 t.tg.tasksCount-- 734 tc := t.tg.tasksCount 735 t.tg.signalHandlers.mu.Unlock() 736 if tc == 1 && t != t.tg.leader { 737 // Our fromPtraceDetach doesn't matter here (in Linux terms, this 738 // is via a call to release_task()). 739 t.tg.leader.exitNotifyLocked(false) 740 } else if tc == 0 { 741 t.tg.pidWithinNS.Store(0) 742 t.tg.processGroup.decRefWithParent(t.tg.parentPG()) 743 } 744 if t.parent != nil { 745 delete(t.parent.children, t) 746 // Do not clear t.parent. It may be still be needed after the task has exited 747 // (for example, to perform ptrace access checks on /proc/[pid] files). 748 } 749 } 750 } 751 752 // Preconditions: The TaskSet mutex must be locked. 753 func (t *Task) exitNotificationSignal(sig linux.Signal, receiver *Task) *linux.SignalInfo { 754 info := &linux.SignalInfo{ 755 Signo: int32(sig), 756 } 757 info.SetPID(int32(receiver.tg.pidns.tids[t])) 758 info.SetUID(int32(t.Credentials().RealKUID.In(receiver.UserNamespace()).OrOverflow())) 759 if t.exitStatus.Signaled() { 760 info.Code = linux.CLD_KILLED 761 info.SetStatus(int32(t.exitStatus.TerminationSignal())) 762 } else { 763 info.Code = linux.CLD_EXITED 764 info.SetStatus(int32(t.exitStatus.ExitStatus())) 765 } 766 // TODO(b/72102453): Set utime, stime. 767 return info 768 } 769 770 // Preconditions: The TaskSet mutex must be locked. 771 func getExitNotifyParentSeccheckInfo(t *Task) (seccheck.FieldSet, *pb.ExitNotifyParentInfo) { 772 fields := seccheck.Global.GetFieldSet(seccheck.PointExitNotifyParent) 773 774 info := &pb.ExitNotifyParentInfo{ 775 ExitStatus: int32(t.tg.exitStatus), 776 } 777 if !fields.Context.Empty() { 778 info.ContextData = &pb.ContextData{} 779 // cwd isn't used for notifyExit seccheck so it's ok to pass an empty 780 // string. 781 LoadSeccheckDataLocked(t, fields.Context, info.ContextData, "") 782 } 783 784 return fields, info 785 } 786 787 // ExitStatus returns t's exit status, which is only guaranteed to be 788 // meaningful if t.ExitState() != TaskExitNone. 789 func (t *Task) ExitStatus() linux.WaitStatus { 790 t.tg.pidns.owner.mu.RLock() 791 defer t.tg.pidns.owner.mu.RUnlock() 792 t.tg.signalHandlers.mu.Lock() 793 defer t.tg.signalHandlers.mu.Unlock() 794 return t.exitStatus 795 } 796 797 // ExitStatus returns the exit status that would be returned by a consuming 798 // wait*() on tg. 799 func (tg *ThreadGroup) ExitStatus() linux.WaitStatus { 800 tg.pidns.owner.mu.RLock() 801 defer tg.pidns.owner.mu.RUnlock() 802 tg.signalHandlers.mu.Lock() 803 defer tg.signalHandlers.mu.Unlock() 804 if tg.exiting { 805 return tg.exitStatus 806 } 807 return tg.leader.exitStatus 808 } 809 810 // TerminationSignal returns the thread group's termination signal, which is 811 // the signal that will be sent to its leader's parent when all threads have 812 // exited. 813 func (tg *ThreadGroup) TerminationSignal() linux.Signal { 814 tg.pidns.owner.mu.RLock() 815 defer tg.pidns.owner.mu.RUnlock() 816 return tg.terminationSignal 817 } 818 819 // Task events that can be waited for. 820 const ( 821 // EventExit represents an exit notification generated for a child thread 822 // group leader or a tracee under the conditions specified in the comment 823 // above runExitNotify. 824 EventExit waiter.EventMask = 1 << iota 825 826 // EventChildGroupStop occurs when a child thread group completes a group 827 // stop (i.e. all tasks in the child thread group have entered a stopped 828 // state as a result of a group stop). 829 EventChildGroupStop 830 831 // EventTraceeStop occurs when a task that is ptraced by a task in the 832 // notified thread group enters a ptrace stop (see ptrace(2)). 833 EventTraceeStop 834 835 // EventGroupContinue occurs when a child thread group, or a thread group 836 // whose leader is ptraced by a task in the notified thread group, that had 837 // initiated or completed a group stop leaves the group stop, due to the 838 // child thread group or any task in the child thread group being sent 839 // SIGCONT. 840 EventGroupContinue 841 ) 842 843 // WaitOptions controls the behavior of Task.Wait. 844 type WaitOptions struct { 845 // If SpecificTID is non-zero, only events from the task with thread ID 846 // SpecificTID are eligible to be waited for. SpecificTID is resolved in 847 // the PID namespace of the waiter (the method receiver of Task.Wait). If 848 // no such task exists, or that task would not otherwise be eligible to be 849 // waited for by the waiting task, then there are no waitable tasks and 850 // Wait will return ECHILD. 851 SpecificTID ThreadID 852 853 // If SpecificPGID is non-zero, only events from ThreadGroups with a 854 // matching ProcessGroupID are eligible to be waited for. (Same 855 // constraints as SpecificTID apply.) 856 SpecificPGID ProcessGroupID 857 858 // Terminology note: Per waitpid(2), "a clone child is one which delivers 859 // no signal, or a signal other than SIGCHLD to its parent upon 860 // termination." In Linux, termination signal is technically a per-task 861 // property rather than a per-thread-group property. However, clone() 862 // forces no termination signal for tasks created with CLONE_THREAD, and 863 // execve() resets the termination signal to SIGCHLD, so all 864 // non-group-leader threads have no termination signal and are therefore 865 // "clone tasks". 866 867 // If NonCloneTasks is true, events from non-clone tasks are eligible to be 868 // waited for. 869 NonCloneTasks bool 870 871 // If CloneTasks is true, events from clone tasks are eligible to be waited 872 // for. 873 CloneTasks bool 874 875 // If SiblingChildren is true, events from children tasks of any task 876 // in the thread group of the waiter are eligible to be waited for. 877 SiblingChildren bool 878 879 // Events is a bitwise combination of the events defined above that specify 880 // what events are of interest to the call to Wait. 881 Events waiter.EventMask 882 883 // If ConsumeEvent is true, the Wait should consume the event such that it 884 // cannot be returned by a future Wait. Note that if a task exit is 885 // consumed in this way, in most cases the task will be reaped. 886 ConsumeEvent bool 887 888 // If BlockInterruptErr is not nil, Wait will block until either an event 889 // is available or there are no tasks that could produce a waitable event; 890 // if that blocking is interrupted, Wait returns BlockInterruptErr. If 891 // BlockInterruptErr is nil, Wait will not block. 892 BlockInterruptErr error 893 } 894 895 // Preconditions: The TaskSet mutex must be locked (for reading or writing). 896 func (o *WaitOptions) matchesTask(t *Task, pidns *PIDNamespace, tracee bool) bool { 897 if o.SpecificTID != 0 && o.SpecificTID != pidns.tids[t] { 898 return false 899 } 900 if o.SpecificPGID != 0 && o.SpecificPGID != pidns.pgids[t.tg.processGroup] { 901 return false 902 } 903 // Tracees are always eligible. 904 if tracee { 905 return true 906 } 907 if t == t.tg.leader && t.tg.terminationSignal == linux.SIGCHLD { 908 return o.NonCloneTasks 909 } 910 return o.CloneTasks 911 } 912 913 // ErrNoWaitableEvent is returned by non-blocking Task.Waits (e.g. 914 // waitpid(WNOHANG)) that find no waitable events, but determine that waitable 915 // events may exist in the future. (In contrast, if a non-blocking or blocking 916 // Wait determines that there are no tasks that can produce a waitable event, 917 // Task.Wait returns ECHILD.) 918 var ErrNoWaitableEvent = errors.New("non-blocking Wait found eligible threads but no waitable events") 919 920 // WaitResult contains information about a waited-for event. 921 type WaitResult struct { 922 // Task is the task that reported the event. 923 Task *Task 924 925 // TID is the thread ID of Task in the PID namespace of the task that 926 // called Wait (that is, the method receiver of the call to Task.Wait). TID 927 // is provided because consuming exit waits cause the thread ID to be 928 // deallocated. 929 TID ThreadID 930 931 // UID is the real UID of Task in the user namespace of the task that 932 // called Wait. 933 UID auth.UID 934 935 // Event is exactly one of the events defined above. 936 Event waiter.EventMask 937 938 // Status is the wait status associated with the event. 939 Status linux.WaitStatus 940 } 941 942 // Wait waits for an event from a thread group that is a child of t's thread 943 // group, or a task in such a thread group, or a task that is ptraced by t, 944 // subject to the options specified in opts. 945 func (t *Task) Wait(opts *WaitOptions) (*WaitResult, error) { 946 if opts.BlockInterruptErr == nil { 947 return t.waitOnce(opts) 948 } 949 w, ch := waiter.NewChannelEntry(opts.Events) 950 t.tg.eventQueue.EventRegister(&w) 951 defer t.tg.eventQueue.EventUnregister(&w) 952 for { 953 wr, err := t.waitOnce(opts) 954 if err != ErrNoWaitableEvent { 955 // This includes err == nil. 956 return wr, err 957 } 958 if err := t.Block(ch); err != nil { 959 return wr, linuxerr.ConvertIntr(err, opts.BlockInterruptErr) 960 } 961 } 962 } 963 964 func (t *Task) waitOnce(opts *WaitOptions) (*WaitResult, error) { 965 anyWaitableTasks := false 966 967 t.tg.pidns.owner.mu.Lock() 968 defer t.tg.pidns.owner.mu.Unlock() 969 970 if opts.SiblingChildren { 971 // We can wait on the children and tracees of any task in the 972 // same thread group. 973 for parent := t.tg.tasks.Front(); parent != nil; parent = parent.Next() { 974 wr, any := t.waitParentLocked(opts, parent) 975 if wr != nil { 976 return wr, nil 977 } 978 anyWaitableTasks = anyWaitableTasks || any 979 } 980 } else { 981 // We can only wait on this task. 982 var wr *WaitResult 983 wr, anyWaitableTasks = t.waitParentLocked(opts, t) 984 if wr != nil { 985 return wr, nil 986 } 987 } 988 989 if anyWaitableTasks { 990 return nil, ErrNoWaitableEvent 991 } 992 return nil, linuxerr.ECHILD 993 } 994 995 // Preconditions: The TaskSet mutex must be locked for writing. 996 func (t *Task) waitParentLocked(opts *WaitOptions, parent *Task) (*WaitResult, bool) { 997 anyWaitableTasks := false 998 999 for child := range parent.children { 1000 if !opts.matchesTask(child, parent.tg.pidns, false) { 1001 continue 1002 } 1003 // Non-leaders don't notify parents on exit and aren't eligible to 1004 // be waited on. 1005 if opts.Events&EventExit != 0 && child == child.tg.leader && !child.exitParentAcked { 1006 anyWaitableTasks = true 1007 if wr := t.waitCollectZombieLocked(child, opts, false); wr != nil { 1008 return wr, anyWaitableTasks 1009 } 1010 } 1011 // Check for group stops and continues. Tasks that have passed 1012 // TaskExitInitiated can no longer participate in group stops. 1013 if opts.Events&(EventChildGroupStop|EventGroupContinue) == 0 { 1014 continue 1015 } 1016 if child.exitState >= TaskExitInitiated { 1017 continue 1018 } 1019 // If the waiter is in the same thread group as the task's 1020 // tracer, do not report its group stops; they will be reported 1021 // as ptrace stops instead. This also skips checking for group 1022 // continues, but they'll be checked for when scanning tracees 1023 // below. (Per kernel/exit.c:wait_consider_task(): "If a 1024 // ptracer wants to distinguish the two events for its own 1025 // children, it should create a separate process which takes 1026 // the role of real parent.") 1027 if tracer := child.Tracer(); tracer != nil && tracer.tg == parent.tg { 1028 continue 1029 } 1030 anyWaitableTasks = true 1031 if opts.Events&EventChildGroupStop != 0 { 1032 if wr := t.waitCollectChildGroupStopLocked(child, opts); wr != nil { 1033 return wr, anyWaitableTasks 1034 } 1035 } 1036 if opts.Events&EventGroupContinue != 0 { 1037 if wr := t.waitCollectGroupContinueLocked(child, opts); wr != nil { 1038 return wr, anyWaitableTasks 1039 } 1040 } 1041 } 1042 for tracee := range parent.ptraceTracees { 1043 if !opts.matchesTask(tracee, parent.tg.pidns, true) { 1044 continue 1045 } 1046 // Non-leaders do notify tracers on exit. 1047 if opts.Events&EventExit != 0 && !tracee.exitTracerAcked { 1048 anyWaitableTasks = true 1049 if wr := t.waitCollectZombieLocked(tracee, opts, true); wr != nil { 1050 return wr, anyWaitableTasks 1051 } 1052 } 1053 if opts.Events&(EventTraceeStop|EventGroupContinue) == 0 { 1054 continue 1055 } 1056 if tracee.exitState >= TaskExitInitiated { 1057 continue 1058 } 1059 anyWaitableTasks = true 1060 if opts.Events&EventTraceeStop != 0 { 1061 if wr := t.waitCollectTraceeStopLocked(tracee, opts); wr != nil { 1062 return wr, anyWaitableTasks 1063 } 1064 } 1065 if opts.Events&EventGroupContinue != 0 { 1066 if wr := t.waitCollectGroupContinueLocked(tracee, opts); wr != nil { 1067 return wr, anyWaitableTasks 1068 } 1069 } 1070 } 1071 1072 return nil, anyWaitableTasks 1073 } 1074 1075 // Preconditions: The TaskSet mutex must be locked for writing. 1076 func (t *Task) waitCollectZombieLocked(target *Task, opts *WaitOptions, asPtracer bool) *WaitResult { 1077 if asPtracer && !target.exitTracerNotified { 1078 return nil 1079 } 1080 if !asPtracer && !target.exitParentNotified { 1081 return nil 1082 } 1083 // Zombied thread group leaders are never waitable until their thread group 1084 // is otherwise empty. Usually this is caught by the 1085 // target.exitParentNotified check above, but if t is both (in the thread 1086 // group of) target's tracer and parent, asPtracer may be true. 1087 if target == target.tg.leader && target.tg.tasksCount != 1 { 1088 return nil 1089 } 1090 pid := t.tg.pidns.tids[target] 1091 uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow() 1092 status := target.exitStatus 1093 if !opts.ConsumeEvent { 1094 return &WaitResult{ 1095 Task: target, 1096 TID: pid, 1097 UID: uid, 1098 Event: EventExit, 1099 Status: status, 1100 } 1101 } 1102 // Surprisingly, the exit status reported by a non-consuming wait can 1103 // differ from that reported by a consuming wait; the latter will return 1104 // the group exit code if one is available. 1105 if target.tg.exiting { 1106 status = target.tg.exitStatus 1107 } 1108 // t may be (in the thread group of) target's parent, tracer, or both. We 1109 // don't need to check for !exitTracerAcked because tracees are detached 1110 // here, and we don't need to check for !exitParentAcked because zombies 1111 // will be reaped here. 1112 if tracer := target.Tracer(); tracer != nil && tracer.tg == t.tg && target.exitTracerNotified { 1113 target.exitTracerAcked = true 1114 target.ptraceTracer.Store((*Task)(nil)) 1115 delete(t.ptraceTracees, target) 1116 } 1117 if target.parent != nil && target.parent.tg == t.tg && target.exitParentNotified { 1118 target.exitParentAcked = true 1119 if target == target.tg.leader { 1120 // target.tg.exitedCPUStats doesn't include target.CPUStats() yet, 1121 // and won't until after target.exitNotifyLocked() (maybe). Include 1122 // target.CPUStats() explicitly. This is consistent with Linux, 1123 // which accounts an exited task's cputime to its thread group in 1124 // kernel/exit.c:release_task() => __exit_signal(), and uses 1125 // thread_group_cputime_adjusted() in wait_task_zombie(). 1126 t.tg.childCPUStats.Accumulate(target.CPUStats()) 1127 t.tg.childCPUStats.Accumulate(target.tg.exitedCPUStats) 1128 t.tg.childCPUStats.Accumulate(target.tg.childCPUStats) 1129 // Update t's child max resident set size. The size will be the maximum 1130 // of this thread's size and all its childrens' sizes. 1131 if t.tg.childMaxRSS < target.tg.maxRSS { 1132 t.tg.childMaxRSS = target.tg.maxRSS 1133 } 1134 if t.tg.childMaxRSS < target.tg.childMaxRSS { 1135 t.tg.childMaxRSS = target.tg.childMaxRSS 1136 } 1137 } 1138 } 1139 target.exitNotifyLocked(false) 1140 return &WaitResult{ 1141 Task: target, 1142 TID: pid, 1143 UID: uid, 1144 Event: EventExit, 1145 Status: status, 1146 } 1147 } 1148 1149 // updateRSSLocked updates t.tg.maxRSS. 1150 // 1151 // Preconditions: The TaskSet mutex must be locked for writing. 1152 func (t *Task) updateRSSLocked() { 1153 if mmMaxRSS := t.MemoryManager().MaxResidentSetSize(); t.tg.maxRSS < mmMaxRSS { 1154 t.tg.maxRSS = mmMaxRSS 1155 } 1156 } 1157 1158 // Preconditions: The TaskSet mutex must be locked for writing. 1159 func (t *Task) waitCollectChildGroupStopLocked(target *Task, opts *WaitOptions) *WaitResult { 1160 target.tg.signalHandlers.mu.Lock() 1161 defer target.tg.signalHandlers.mu.Unlock() 1162 if !target.tg.groupStopWaitable { 1163 return nil 1164 } 1165 pid := t.tg.pidns.tids[target] 1166 uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow() 1167 sig := target.tg.groupStopSignal 1168 if opts.ConsumeEvent { 1169 target.tg.groupStopWaitable = false 1170 } 1171 return &WaitResult{ 1172 Task: target, 1173 TID: pid, 1174 UID: uid, 1175 Event: EventChildGroupStop, 1176 Status: linux.WaitStatusStopped(uint32(sig)), 1177 } 1178 } 1179 1180 // Preconditions: The TaskSet mutex must be locked for writing. 1181 func (t *Task) waitCollectGroupContinueLocked(target *Task, opts *WaitOptions) *WaitResult { 1182 target.tg.signalHandlers.mu.Lock() 1183 defer target.tg.signalHandlers.mu.Unlock() 1184 if !target.tg.groupContWaitable { 1185 return nil 1186 } 1187 pid := t.tg.pidns.tids[target] 1188 uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow() 1189 if opts.ConsumeEvent { 1190 target.tg.groupContWaitable = false 1191 } 1192 return &WaitResult{ 1193 Task: target, 1194 TID: pid, 1195 UID: uid, 1196 Event: EventGroupContinue, 1197 Status: linux.WaitStatusContinued(), 1198 } 1199 } 1200 1201 // Preconditions: The TaskSet mutex must be locked for writing. 1202 func (t *Task) waitCollectTraceeStopLocked(target *Task, opts *WaitOptions) *WaitResult { 1203 target.tg.signalHandlers.mu.Lock() 1204 defer target.tg.signalHandlers.mu.Unlock() 1205 if target.stop == nil { 1206 return nil 1207 } 1208 if _, ok := target.stop.(*ptraceStop); !ok { 1209 return nil 1210 } 1211 if target.ptraceCode == 0 { 1212 return nil 1213 } 1214 pid := t.tg.pidns.tids[target] 1215 uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow() 1216 code := target.ptraceCode 1217 if opts.ConsumeEvent { 1218 target.ptraceCode = 0 1219 } 1220 return &WaitResult{ 1221 Task: target, 1222 TID: pid, 1223 UID: uid, 1224 Event: EventTraceeStop, 1225 Status: linux.WaitStatusStopped(uint32(code)), 1226 } 1227 } 1228 1229 // ExitState returns t's current progress through the exit path. 1230 func (t *Task) ExitState() TaskExitState { 1231 t.tg.pidns.owner.mu.RLock() 1232 defer t.tg.pidns.owner.mu.RUnlock() 1233 return t.exitState 1234 } 1235 1236 // ParentDeathSignal returns t's parent death signal. 1237 func (t *Task) ParentDeathSignal() linux.Signal { 1238 t.mu.Lock() 1239 defer t.mu.Unlock() 1240 return t.parentDeathSignal 1241 } 1242 1243 // SetParentDeathSignal sets t's parent death signal. 1244 func (t *Task) SetParentDeathSignal(sig linux.Signal) { 1245 t.mu.Lock() 1246 defer t.mu.Unlock() 1247 t.parentDeathSignal = sig 1248 }