github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/task_signals.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 // This file defines the behavior of task signal handling. 18 19 import ( 20 "fmt" 21 "sync/atomic" 22 "time" 23 24 "github.com/SagerNet/gvisor/pkg/abi/linux" 25 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 26 "github.com/SagerNet/gvisor/pkg/eventchannel" 27 "github.com/SagerNet/gvisor/pkg/hostarch" 28 "github.com/SagerNet/gvisor/pkg/sentry/arch" 29 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 30 ucspb "github.com/SagerNet/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" 31 "github.com/SagerNet/gvisor/pkg/syserror" 32 "github.com/SagerNet/gvisor/pkg/waiter" 33 ) 34 35 // SignalAction is an internal signal action. 36 type SignalAction int 37 38 // Available signal actions. 39 // Note that although we refer the complete set internally, 40 // the application is only capable of using the Default and 41 // Ignore actions from the system call interface. 42 const ( 43 SignalActionTerm SignalAction = iota 44 SignalActionCore 45 SignalActionStop 46 SignalActionIgnore 47 SignalActionHandler 48 ) 49 50 // Default signal handler actions. Note that for most signals, 51 // (except SIGKILL and SIGSTOP) these can be overridden by the app. 52 var defaultActions = map[linux.Signal]SignalAction{ 53 // POSIX.1-1990 standard. 54 linux.SIGHUP: SignalActionTerm, 55 linux.SIGINT: SignalActionTerm, 56 linux.SIGQUIT: SignalActionCore, 57 linux.SIGILL: SignalActionCore, 58 linux.SIGABRT: SignalActionCore, 59 linux.SIGFPE: SignalActionCore, 60 linux.SIGKILL: SignalActionTerm, // but see ThreadGroup.applySignalSideEffects 61 linux.SIGSEGV: SignalActionCore, 62 linux.SIGPIPE: SignalActionTerm, 63 linux.SIGALRM: SignalActionTerm, 64 linux.SIGTERM: SignalActionTerm, 65 linux.SIGUSR1: SignalActionTerm, 66 linux.SIGUSR2: SignalActionTerm, 67 linux.SIGCHLD: SignalActionIgnore, 68 linux.SIGCONT: SignalActionIgnore, // but see ThreadGroup.applySignalSideEffects 69 linux.SIGSTOP: SignalActionStop, 70 linux.SIGTSTP: SignalActionStop, 71 linux.SIGTTIN: SignalActionStop, 72 linux.SIGTTOU: SignalActionStop, 73 // POSIX.1-2001 standard. 74 linux.SIGBUS: SignalActionCore, 75 linux.SIGPROF: SignalActionTerm, 76 linux.SIGSYS: SignalActionCore, 77 linux.SIGTRAP: SignalActionCore, 78 linux.SIGURG: SignalActionIgnore, 79 linux.SIGVTALRM: SignalActionTerm, 80 linux.SIGXCPU: SignalActionCore, 81 linux.SIGXFSZ: SignalActionCore, 82 // The rest on linux. 83 linux.SIGSTKFLT: SignalActionTerm, 84 linux.SIGIO: SignalActionTerm, 85 linux.SIGPWR: SignalActionTerm, 86 linux.SIGWINCH: SignalActionIgnore, 87 } 88 89 // computeAction figures out what to do given a signal number 90 // and an linux.SigAction. SIGSTOP always results in a SignalActionStop, 91 // and SIGKILL always results in a SignalActionTerm. 92 // Signal 0 is always ignored as many programs use it for various internal functions 93 // and don't expect it to do anything. 94 // 95 // In the event the signal is not one of these, act.Handler determines what 96 // happens next. 97 // If act.Handler is: 98 // 0, the default action is taken; 99 // 1, the signal is ignored; 100 // anything else, the function returns SignalActionHandler. 101 func computeAction(sig linux.Signal, act linux.SigAction) SignalAction { 102 switch sig { 103 case linux.SIGSTOP: 104 return SignalActionStop 105 case linux.SIGKILL: 106 return SignalActionTerm 107 case linux.Signal(0): 108 return SignalActionIgnore 109 } 110 111 switch act.Handler { 112 case linux.SIG_DFL: 113 return defaultActions[sig] 114 case linux.SIG_IGN: 115 return SignalActionIgnore 116 default: 117 return SignalActionHandler 118 } 119 } 120 121 // UnblockableSignals contains the set of signals which cannot be blocked. 122 var UnblockableSignals = linux.MakeSignalSet(linux.SIGKILL, linux.SIGSTOP) 123 124 // StopSignals is the set of signals whose default action is SignalActionStop. 125 var StopSignals = linux.MakeSignalSet(linux.SIGSTOP, linux.SIGTSTP, linux.SIGTTIN, linux.SIGTTOU) 126 127 // dequeueSignalLocked returns a pending signal that is *not* included in mask. 128 // If there are no pending unmasked signals, dequeueSignalLocked returns nil. 129 // 130 // Preconditions: t.tg.signalHandlers.mu must be locked. 131 func (t *Task) dequeueSignalLocked(mask linux.SignalSet) *linux.SignalInfo { 132 if info := t.pendingSignals.dequeue(mask); info != nil { 133 return info 134 } 135 return t.tg.pendingSignals.dequeue(mask) 136 } 137 138 // discardSpecificLocked removes all instances of the given signal from all 139 // signal queues in tg. 140 // 141 // Preconditions: The signal mutex must be locked. 142 func (tg *ThreadGroup) discardSpecificLocked(sig linux.Signal) { 143 tg.pendingSignals.discardSpecific(sig) 144 for t := tg.tasks.Front(); t != nil; t = t.Next() { 145 t.pendingSignals.discardSpecific(sig) 146 } 147 } 148 149 // PendingSignals returns the set of pending signals. 150 func (t *Task) PendingSignals() linux.SignalSet { 151 t.tg.pidns.owner.mu.RLock() 152 defer t.tg.pidns.owner.mu.RUnlock() 153 t.tg.signalHandlers.mu.Lock() 154 defer t.tg.signalHandlers.mu.Unlock() 155 return t.pendingSignals.pendingSet | t.tg.pendingSignals.pendingSet 156 } 157 158 // deliverSignal delivers the given signal and returns the following run state. 159 func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRunState { 160 sigact := computeAction(linux.Signal(info.Signo), act) 161 162 if t.haveSyscallReturn { 163 if sre, ok := syserror.SyscallRestartErrnoFromReturn(t.Arch().Return()); ok { 164 // Signals that are ignored, cause a thread group stop, or 165 // terminate the thread group do not interact with interrupted 166 // syscalls; in Linux terms, they are never returned to the signal 167 // handling path from get_signal => get_signal_to_deliver. The 168 // behavior of an interrupted syscall is determined by the first 169 // signal that is actually handled (by userspace). 170 if sigact == SignalActionHandler { 171 switch { 172 case sre == syserror.ERESTARTNOHAND: 173 fallthrough 174 case sre == syserror.ERESTART_RESTARTBLOCK: 175 fallthrough 176 case (sre == syserror.ERESTARTSYS && act.Flags&linux.SA_RESTART == 0): 177 t.Debugf("Not restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo) 178 t.Arch().SetReturn(uintptr(-ExtractErrno(syserror.EINTR, -1))) 179 default: 180 t.Debugf("Restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo) 181 t.Arch().RestartSyscall() 182 } 183 } 184 } 185 } 186 187 switch sigact { 188 case SignalActionTerm, SignalActionCore: 189 // "Default action is to terminate the process." - signal(7) 190 t.Debugf("Signal %d: terminating thread group", info.Signo) 191 192 // Emit an event channel messages related to this uncaught signal. 193 ucs := &ucspb.UncaughtSignal{ 194 Tid: int32(t.Kernel().TaskSet().Root.IDOfTask(t)), 195 Pid: int32(t.Kernel().TaskSet().Root.IDOfThreadGroup(t.ThreadGroup())), 196 Registers: t.Arch().StateData().Proto(), 197 SignalNumber: info.Signo, 198 } 199 200 // Attach an fault address if appropriate. 201 switch linux.Signal(info.Signo) { 202 case linux.SIGSEGV, linux.SIGFPE, linux.SIGILL, linux.SIGTRAP, linux.SIGBUS: 203 ucs.FaultAddr = info.Addr() 204 } 205 206 eventchannel.Emit(ucs) 207 208 t.PrepareGroupExit(ExitStatus{Signo: int(info.Signo)}) 209 return (*runExit)(nil) 210 211 case SignalActionStop: 212 // "Default action is to stop the process." 213 t.initiateGroupStop(info) 214 215 case SignalActionIgnore: 216 // "Default action is to ignore the signal." 217 t.Debugf("Signal %d: ignored", info.Signo) 218 219 case SignalActionHandler: 220 // Try to deliver the signal to the user-configured handler. 221 t.Debugf("Signal %d: delivering to handler", info.Signo) 222 if err := t.deliverSignalToHandler(info, act); err != nil { 223 // This is not a warning, it can occur during normal operation. 224 t.Debugf("Failed to deliver signal %+v to user handler: %v", info, err) 225 226 // Send a forced SIGSEGV. If the signal that couldn't be delivered 227 // was a SIGSEGV, force the handler to SIG_DFL. 228 t.forceSignal(linux.SIGSEGV, linux.Signal(info.Signo) == linux.SIGSEGV /* unconditional */) 229 t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) 230 } 231 232 default: 233 panic(fmt.Sprintf("Unknown signal action %+v, %d?", info, computeAction(linux.Signal(info.Signo), act))) 234 } 235 return (*runInterrupt)(nil) 236 } 237 238 // deliverSignalToHandler changes the task's userspace state to enter the given 239 // user-configured handler for the given signal. 240 func (t *Task) deliverSignalToHandler(info *linux.SignalInfo, act linux.SigAction) error { 241 // Signal delivery to an application handler interrupts restartable 242 // sequences. 243 t.rseqInterrupt() 244 245 // Are executing on the main stack, 246 // or the provided alternate stack? 247 sp := hostarch.Addr(t.Arch().Stack()) 248 249 // N.B. This is a *copy* of the alternate stack that the user's signal 250 // handler expects to see in its ucontext (even if it's not in use). 251 alt := t.signalStack 252 if act.Flags&linux.SA_ONSTACK != 0 && alt.IsEnabled() { 253 alt.Flags |= linux.SS_ONSTACK 254 if !alt.Contains(sp) { 255 sp = hostarch.Addr(alt.Top()) 256 } 257 } 258 259 mm := t.MemoryManager() 260 // Set up the signal handler. If we have a saved signal mask, the signal 261 // handler should run with the current mask, but sigreturn should restore 262 // the saved one. 263 st := &arch.Stack{ 264 Arch: t.Arch(), 265 IO: mm, 266 Bottom: sp, 267 } 268 mask := t.signalMask 269 if t.haveSavedSignalMask { 270 mask = t.savedSignalMask 271 } 272 273 // Set up the restorer. 274 // x86-64 should always uses SA_RESTORER, but this flag is optional on other platforms. 275 // Please see the linux code as reference: 276 // linux/arch/x86/kernel/signal.c:__setup_rt_frame() 277 // If SA_RESTORER is not configured, we can use the sigreturn trampolines 278 // the vdso provides instead. 279 // Please see the linux code as reference: 280 // linux/arch/arm64/kernel/signal.c:setup_return() 281 if act.Flags&linux.SA_RESTORER == 0 { 282 act.Restorer = mm.VDSOSigReturn() 283 } 284 285 if err := t.Arch().SignalSetup(st, &act, info, &alt, mask); err != nil { 286 return err 287 } 288 t.p.FullStateChanged() 289 t.haveSavedSignalMask = false 290 291 // Add our signal mask. 292 newMask := t.signalMask | act.Mask 293 if act.Flags&linux.SA_NODEFER == 0 { 294 newMask |= linux.SignalSetOf(linux.Signal(info.Signo)) 295 } 296 t.SetSignalMask(newMask) 297 298 return nil 299 } 300 301 var ctrlResume = &SyscallControl{ignoreReturn: true} 302 303 // SignalReturn implements sigreturn(2) (if rt is false) or rt_sigreturn(2) (if 304 // rt is true). 305 func (t *Task) SignalReturn(rt bool) (*SyscallControl, error) { 306 st := t.Stack() 307 sigset, alt, err := t.Arch().SignalRestore(st, rt) 308 if err != nil { 309 return nil, err 310 } 311 312 // Attempt to record the given signal stack. Note that we silently 313 // ignore failures here, as does Linux. Only an EFAULT may be 314 // generated, but SignalRestore has already deserialized the entire 315 // frame successfully. 316 t.SetSignalStack(alt) 317 318 // Restore our signal mask. SIGKILL and SIGSTOP should not be blocked. 319 t.SetSignalMask(sigset &^ UnblockableSignals) 320 t.p.FullStateChanged() 321 322 return ctrlResume, nil 323 } 324 325 // Sigtimedwait implements the semantics of sigtimedwait(2). 326 // 327 // Preconditions: 328 // * The caller must be running on the task goroutine. 329 // * t.exitState < TaskExitZombie. 330 func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux.SignalInfo, error) { 331 // set is the set of signals we're interested in; invert it to get the set 332 // of signals to block. 333 mask := ^(set &^ UnblockableSignals) 334 335 t.tg.signalHandlers.mu.Lock() 336 defer t.tg.signalHandlers.mu.Unlock() 337 if info := t.dequeueSignalLocked(mask); info != nil { 338 return info, nil 339 } 340 341 if timeout == 0 { 342 return nil, linuxerr.EAGAIN 343 } 344 345 // Unblock signals we're waiting for. Remember the original signal mask so 346 // that Task.sendSignalTimerLocked doesn't discard ignored signals that 347 // we're temporarily unblocking. 348 t.realSignalMask = t.signalMask 349 t.setSignalMaskLocked(t.signalMask & mask) 350 351 // Wait for a timeout or new signal. 352 t.tg.signalHandlers.mu.Unlock() 353 _, err := t.BlockWithTimeout(nil, true, timeout) 354 t.tg.signalHandlers.mu.Lock() 355 356 // Restore the original signal mask. 357 t.setSignalMaskLocked(t.realSignalMask) 358 t.realSignalMask = 0 359 360 if info := t.dequeueSignalLocked(mask); info != nil { 361 return info, nil 362 } 363 if err == linuxerr.ETIMEDOUT { 364 return nil, linuxerr.EAGAIN 365 } 366 return nil, err 367 } 368 369 // SendSignal sends the given signal to t. 370 // 371 // The following errors may be returned: 372 // 373 // syserror.ESRCH - The task has exited. 374 // linuxerr.EINVAL - The signal is not valid. 375 // linuxerr.EAGAIN - THe signal is realtime, and cannot be queued. 376 // 377 func (t *Task) SendSignal(info *linux.SignalInfo) error { 378 t.tg.pidns.owner.mu.RLock() 379 defer t.tg.pidns.owner.mu.RUnlock() 380 t.tg.signalHandlers.mu.Lock() 381 defer t.tg.signalHandlers.mu.Unlock() 382 return t.sendSignalLocked(info, false /* group */) 383 } 384 385 // SendGroupSignal sends the given signal to t's thread group. 386 func (t *Task) SendGroupSignal(info *linux.SignalInfo) error { 387 t.tg.pidns.owner.mu.RLock() 388 defer t.tg.pidns.owner.mu.RUnlock() 389 t.tg.signalHandlers.mu.Lock() 390 defer t.tg.signalHandlers.mu.Unlock() 391 return t.sendSignalLocked(info, true /* group */) 392 } 393 394 // SendSignal sends the given signal to tg, using tg's leader to determine if 395 // the signal is blocked. 396 func (tg *ThreadGroup) SendSignal(info *linux.SignalInfo) error { 397 tg.pidns.owner.mu.RLock() 398 defer tg.pidns.owner.mu.RUnlock() 399 tg.signalHandlers.mu.Lock() 400 defer tg.signalHandlers.mu.Unlock() 401 return tg.leader.sendSignalLocked(info, true /* group */) 402 } 403 404 func (t *Task) sendSignalLocked(info *linux.SignalInfo, group bool) error { 405 return t.sendSignalTimerLocked(info, group, nil) 406 } 407 408 func (t *Task) sendSignalTimerLocked(info *linux.SignalInfo, group bool, timer *IntervalTimer) error { 409 if t.exitState == TaskExitDead { 410 return syserror.ESRCH 411 } 412 sig := linux.Signal(info.Signo) 413 if sig == 0 { 414 return nil 415 } 416 if !sig.IsValid() { 417 return linuxerr.EINVAL 418 } 419 420 // Signal side effects apply even if the signal is ultimately discarded. 421 t.tg.applySignalSideEffectsLocked(sig) 422 423 // TODO: "Only signals for which the "init" process has established a 424 // signal handler can be sent to the "init" process by other members of the 425 // PID namespace. This restriction applies even to privileged processes, 426 // and prevents other members of the PID namespace from accidentally 427 // killing the "init" process." - pid_namespaces(7). We don't currently do 428 // this for child namespaces, though we should; we also don't do this for 429 // the root namespace (the same restriction applies to global init on 430 // Linux), where whether or not we should is much murkier. In practice, 431 // most sandboxed applications are not prepared to function as an init 432 // process. 433 434 // Unmasked, ignored signals are discarded without being queued, unless 435 // they will be visible to a tracer. Even for group signals, it's the 436 // originally-targeted task's signal mask and tracer that matter; compare 437 // Linux's kernel/signal.c:__send_signal() => prepare_signal() => 438 // sig_ignored(). 439 ignored := computeAction(sig, t.tg.signalHandlers.actions[sig]) == SignalActionIgnore 440 if sigset := linux.SignalSetOf(sig); sigset&t.signalMask == 0 && sigset&t.realSignalMask == 0 && ignored && !t.hasTracer() { 441 t.Debugf("Discarding ignored signal %d", sig) 442 if timer != nil { 443 timer.signalRejectedLocked() 444 } 445 return nil 446 } 447 448 q := &t.pendingSignals 449 if group { 450 q = &t.tg.pendingSignals 451 } 452 if !q.enqueue(info, timer) { 453 if sig.IsRealtime() { 454 return linuxerr.EAGAIN 455 } 456 t.Debugf("Discarding duplicate signal %d", sig) 457 if timer != nil { 458 timer.signalRejectedLocked() 459 } 460 return nil 461 } 462 463 // Find a receiver to notify. Note that the task we choose to notify, if 464 // any, may not be the task that actually dequeues and handles the signal; 465 // e.g. a racing signal mask change may cause the notified task to become 466 // ineligible, or a racing sibling task may dequeue the signal first. 467 if t.canReceiveSignalLocked(sig) { 468 t.Debugf("Notified of signal %d", sig) 469 t.interrupt() 470 return nil 471 } 472 if group { 473 if nt := t.tg.findSignalReceiverLocked(sig); nt != nil { 474 nt.Debugf("Notified of group signal %d", sig) 475 nt.interrupt() 476 return nil 477 } 478 } 479 t.Debugf("No task notified of signal %d", sig) 480 return nil 481 } 482 483 func (tg *ThreadGroup) applySignalSideEffectsLocked(sig linux.Signal) { 484 switch { 485 case linux.SignalSetOf(sig)&StopSignals != 0: 486 // Stop signals cause all prior SIGCONT to be discarded. (This is 487 // despite the fact this has little effect since SIGCONT's most 488 // important effect is applied when the signal is sent in the branch 489 // below, not when the signal is delivered.) 490 tg.discardSpecificLocked(linux.SIGCONT) 491 case sig == linux.SIGCONT: 492 // "The SIGCONT signal has a side effect of waking up (all threads of) 493 // a group-stopped process. This side effect happens before 494 // signal-delivery-stop. The tracer can't suppress this side effect (it 495 // can only suppress signal injection, which only causes the SIGCONT 496 // handler to not be executed in the tracee, if such a handler is 497 // installed." - ptrace(2) 498 tg.endGroupStopLocked(true) 499 case sig == linux.SIGKILL: 500 // "SIGKILL does not generate signal-delivery-stop and therefore the 501 // tracer can't suppress it. SIGKILL kills even within system calls 502 // (syscall-exit-stop is not generated prior to death by SIGKILL)." - 503 // ptrace(2) 504 // 505 // Note that this differs from ThreadGroup.requestExit in that it 506 // ignores tg.execing. 507 if !tg.exiting { 508 tg.exiting = true 509 tg.exitStatus = ExitStatus{Signo: int(linux.SIGKILL)} 510 } 511 for t := tg.tasks.Front(); t != nil; t = t.Next() { 512 t.killLocked() 513 } 514 } 515 } 516 517 // canReceiveSignalLocked returns true if t should be interrupted to receive 518 // the given signal. canReceiveSignalLocked is analogous to Linux's 519 // kernel/signal.c:wants_signal(), but see below for divergences. 520 // 521 // Preconditions: The signal mutex must be locked. 522 func (t *Task) canReceiveSignalLocked(sig linux.Signal) bool { 523 // Notify that the signal is queued. 524 t.signalQueue.Notify(waiter.EventMask(linux.MakeSignalSet(sig))) 525 526 // - Do not choose tasks that are blocking the signal. 527 if linux.SignalSetOf(sig)&t.signalMask != 0 { 528 return false 529 } 530 // - No need to check Task.exitState, as the exit path sets every bit in the 531 // signal mask when it transitions from TaskExitNone to TaskExitInitiated. 532 // - No special case for SIGKILL: SIGKILL already interrupted all tasks in the 533 // task group via applySignalSideEffects => killLocked. 534 // - Do not choose stopped tasks, which cannot handle signals. 535 if t.stop != nil { 536 return false 537 } 538 // - Do not choose tasks that have already been interrupted, as they may be 539 // busy handling another signal. 540 if len(t.interruptChan) != 0 { 541 return false 542 } 543 return true 544 } 545 546 // findSignalReceiverLocked returns a task in tg that should be interrupted to 547 // receive the given signal. If no such task exists, findSignalReceiverLocked 548 // returns nil. 549 // 550 // Linux actually records curr_target to balance the group signal targets. 551 // 552 // Preconditions: The signal mutex must be locked. 553 func (tg *ThreadGroup) findSignalReceiverLocked(sig linux.Signal) *Task { 554 for t := tg.tasks.Front(); t != nil; t = t.Next() { 555 if t.canReceiveSignalLocked(sig) { 556 return t 557 } 558 } 559 return nil 560 } 561 562 // forceSignal ensures that the task is not ignoring or blocking the given 563 // signal. If unconditional is true, forceSignal takes action even if the 564 // signal isn't being ignored or blocked. 565 func (t *Task) forceSignal(sig linux.Signal, unconditional bool) { 566 t.tg.pidns.owner.mu.RLock() 567 defer t.tg.pidns.owner.mu.RUnlock() 568 t.tg.signalHandlers.mu.Lock() 569 defer t.tg.signalHandlers.mu.Unlock() 570 t.forceSignalLocked(sig, unconditional) 571 } 572 573 func (t *Task) forceSignalLocked(sig linux.Signal, unconditional bool) { 574 blocked := linux.SignalSetOf(sig)&t.signalMask != 0 575 act := t.tg.signalHandlers.actions[sig] 576 ignored := act.Handler == linux.SIG_IGN 577 if blocked || ignored || unconditional { 578 act.Handler = linux.SIG_DFL 579 t.tg.signalHandlers.actions[sig] = act 580 if blocked { 581 t.setSignalMaskLocked(t.signalMask &^ linux.SignalSetOf(sig)) 582 } 583 } 584 } 585 586 // SignalMask returns a copy of t's signal mask. 587 func (t *Task) SignalMask() linux.SignalSet { 588 return linux.SignalSet(atomic.LoadUint64((*uint64)(&t.signalMask))) 589 } 590 591 // SetSignalMask sets t's signal mask. 592 // 593 // Preconditions: 594 // * The caller must be running on the task goroutine. 595 // * t.exitState < TaskExitZombie. 596 func (t *Task) SetSignalMask(mask linux.SignalSet) { 597 // By precondition, t prevents t.tg from completing an execve and mutating 598 // t.tg.signalHandlers, so we can skip the TaskSet mutex. 599 t.tg.signalHandlers.mu.Lock() 600 t.setSignalMaskLocked(mask) 601 t.tg.signalHandlers.mu.Unlock() 602 } 603 604 // Preconditions: The signal mutex must be locked. 605 func (t *Task) setSignalMaskLocked(mask linux.SignalSet) { 606 oldMask := t.signalMask 607 atomic.StoreUint64((*uint64)(&t.signalMask), uint64(mask)) 608 609 // If the new mask blocks any signals that were not blocked by the old 610 // mask, and at least one such signal is pending in tg.pendingSignals, and 611 // t has been woken, it could be the case that t was woken to handle that 612 // signal, but will no longer do so as a result of its new signal mask, so 613 // we have to pick a replacement. 614 blocked := mask &^ oldMask 615 blockedGroupPending := blocked & t.tg.pendingSignals.pendingSet 616 if blockedGroupPending != 0 && t.interrupted() { 617 linux.ForEachSignal(blockedGroupPending, func(sig linux.Signal) { 618 if nt := t.tg.findSignalReceiverLocked(sig); nt != nil { 619 nt.interrupt() 620 return 621 } 622 }) 623 } 624 625 // Conversely, if the new mask unblocks any signals that were blocked by 626 // the old mask, and at least one such signal is pending, we may now need 627 // to handle that signal. 628 unblocked := oldMask &^ mask 629 unblockedPending := unblocked & (t.pendingSignals.pendingSet | t.tg.pendingSignals.pendingSet) 630 if unblockedPending != 0 { 631 t.interruptSelf() 632 } 633 } 634 635 // SetSavedSignalMask sets the saved signal mask (see Task.savedSignalMask's 636 // comment). 637 // 638 // Preconditions: The caller must be running on the task goroutine. 639 func (t *Task) SetSavedSignalMask(mask linux.SignalSet) { 640 t.savedSignalMask = mask 641 t.haveSavedSignalMask = true 642 } 643 644 // SignalStack returns the task-private signal stack. 645 func (t *Task) SignalStack() linux.SignalStack { 646 t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch()) 647 alt := t.signalStack 648 if t.onSignalStack(alt) { 649 alt.Flags |= linux.SS_ONSTACK 650 } 651 return alt 652 } 653 654 // onSignalStack returns true if the task is executing on the given signal stack. 655 func (t *Task) onSignalStack(alt linux.SignalStack) bool { 656 sp := hostarch.Addr(t.Arch().Stack()) 657 return alt.Contains(sp) 658 } 659 660 // SetSignalStack sets the task-private signal stack. 661 // 662 // This value may not be changed if the task is currently executing on the 663 // signal stack, i.e. if t.onSignalStack returns true. In this case, this 664 // function will return false. Otherwise, true is returned. 665 func (t *Task) SetSignalStack(alt linux.SignalStack) bool { 666 // Check that we're not executing on the stack. 667 if t.onSignalStack(t.signalStack) { 668 return false 669 } 670 671 if alt.Flags&linux.SS_DISABLE != 0 { 672 // Don't record anything beyond the flags. 673 t.signalStack = linux.SignalStack{ 674 Flags: linux.SS_DISABLE, 675 } 676 } else { 677 // Mask out irrelevant parts: only disable matters. 678 alt.Flags &= linux.SS_DISABLE 679 t.signalStack = alt 680 } 681 return true 682 } 683 684 // SetSigAction atomically sets the thread group's signal action for signal sig 685 // to *actptr (if actptr is not nil) and returns the old signal action. 686 func (tg *ThreadGroup) SetSigAction(sig linux.Signal, actptr *linux.SigAction) (linux.SigAction, error) { 687 if !sig.IsValid() { 688 return linux.SigAction{}, linuxerr.EINVAL 689 } 690 691 tg.pidns.owner.mu.RLock() 692 defer tg.pidns.owner.mu.RUnlock() 693 sh := tg.signalHandlers 694 sh.mu.Lock() 695 defer sh.mu.Unlock() 696 oldact := sh.actions[sig] 697 if actptr != nil { 698 if sig == linux.SIGKILL || sig == linux.SIGSTOP { 699 return oldact, linuxerr.EINVAL 700 } 701 702 act := *actptr 703 act.Mask &^= UnblockableSignals 704 sh.actions[sig] = act 705 // From POSIX, by way of Linux: 706 // 707 // "Setting a signal action to SIG_IGN for a signal that is pending 708 // shall cause the pending signal to be discarded, whether or not it is 709 // blocked." 710 // 711 // "Setting a signal action to SIG_DFL for a signal that is pending and 712 // whose default action is to ignore the signal (for example, SIGCHLD), 713 // shall cause the pending signal to be discarded, whether or not it is 714 // blocked." 715 if computeAction(sig, act) == SignalActionIgnore { 716 tg.discardSpecificLocked(sig) 717 } 718 } 719 return oldact, nil 720 } 721 722 // groupStop is a TaskStop placed on tasks that have received a stop signal 723 // (SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU). (The term "group-stop" originates from 724 // the ptrace man page.) 725 // 726 // +stateify savable 727 type groupStop struct{} 728 729 // Killable implements TaskStop.Killable. 730 func (*groupStop) Killable() bool { return true } 731 732 // initiateGroupStop attempts to initiate a group stop based on a 733 // previously-dequeued stop signal. 734 // 735 // Preconditions: The caller must be running on the task goroutine. 736 func (t *Task) initiateGroupStop(info *linux.SignalInfo) { 737 t.tg.pidns.owner.mu.RLock() 738 defer t.tg.pidns.owner.mu.RUnlock() 739 t.tg.signalHandlers.mu.Lock() 740 defer t.tg.signalHandlers.mu.Unlock() 741 if t.groupStopPending { 742 t.Debugf("Signal %d: not stopping thread group: lost to racing stop signal", info.Signo) 743 return 744 } 745 if !t.tg.groupStopDequeued { 746 t.Debugf("Signal %d: not stopping thread group: lost to racing SIGCONT", info.Signo) 747 return 748 } 749 if t.tg.exiting { 750 t.Debugf("Signal %d: not stopping thread group: lost to racing group exit", info.Signo) 751 return 752 } 753 if t.tg.execing != nil { 754 t.Debugf("Signal %d: not stopping thread group: lost to racing execve", info.Signo) 755 return 756 } 757 if !t.tg.groupStopComplete { 758 t.tg.groupStopSignal = linux.Signal(info.Signo) 759 } 760 t.tg.groupStopPendingCount = 0 761 for t2 := t.tg.tasks.Front(); t2 != nil; t2 = t2.Next() { 762 if t2.killedLocked() || t2.exitState >= TaskExitInitiated { 763 t2.groupStopPending = false 764 continue 765 } 766 t2.groupStopPending = true 767 t2.groupStopAcknowledged = false 768 if t2.ptraceSeized { 769 t2.trapNotifyPending = true 770 if s, ok := t2.stop.(*ptraceStop); ok && s.listen { 771 t2.endInternalStopLocked() 772 } 773 } 774 t2.interrupt() 775 t.tg.groupStopPendingCount++ 776 } 777 t.Debugf("Signal %d: stopping %d threads in thread group", info.Signo, t.tg.groupStopPendingCount) 778 } 779 780 // endGroupStopLocked ensures that all prior stop signals received by tg are 781 // not stopping tg and will not stop tg in the future. If broadcast is true, 782 // parent and tracer notification will be scheduled if appropriate. 783 // 784 // Preconditions: The signal mutex must be locked. 785 func (tg *ThreadGroup) endGroupStopLocked(broadcast bool) { 786 // Discard all previously-queued stop signals. 787 linux.ForEachSignal(StopSignals, tg.discardSpecificLocked) 788 789 if tg.groupStopPendingCount == 0 && !tg.groupStopComplete { 790 return 791 } 792 793 completeStr := "incomplete" 794 if tg.groupStopComplete { 795 completeStr = "complete" 796 } 797 tg.leader.Debugf("Ending %s group stop with %d threads pending", completeStr, tg.groupStopPendingCount) 798 for t := tg.tasks.Front(); t != nil; t = t.Next() { 799 t.groupStopPending = false 800 if t.ptraceSeized { 801 t.trapNotifyPending = true 802 if s, ok := t.stop.(*ptraceStop); ok && s.listen { 803 t.endInternalStopLocked() 804 } 805 } else { 806 if _, ok := t.stop.(*groupStop); ok { 807 t.endInternalStopLocked() 808 } 809 } 810 } 811 if broadcast { 812 // Instead of notifying the parent here, set groupContNotify so that 813 // one of the continuing tasks does so. (Linux does something similar.) 814 // The reason we do this is to keep locking sane. In order to send a 815 // signal to the parent, we need to lock its signal mutex, but we're 816 // already holding tg's signal mutex, and the TaskSet mutex must be 817 // locked for writing for us to hold two signal mutexes. Since we don't 818 // want to require this for endGroupStopLocked (which is called from 819 // signal-sending paths), nor do we want to lose atomicity by releasing 820 // the mutexes we're already holding, just let the continuing thread 821 // group deal with it. 822 tg.groupContNotify = true 823 tg.groupContInterrupted = !tg.groupStopComplete 824 tg.groupContWaitable = true 825 } 826 // Unsetting groupStopDequeued will cause racing calls to initiateGroupStop 827 // to recognize that the group stop has been cancelled. 828 tg.groupStopDequeued = false 829 tg.groupStopSignal = 0 830 tg.groupStopPendingCount = 0 831 tg.groupStopComplete = false 832 tg.groupStopWaitable = false 833 } 834 835 // participateGroupStopLocked is called to handle thread group side effects 836 // after t unsets t.groupStopPending. The caller must handle task side effects 837 // (e.g. placing the task goroutine into the group stop). It returns true if 838 // the caller must notify t.tg.leader's parent of a completed group stop (which 839 // participateGroupStopLocked cannot do due to holding the wrong locks). 840 // 841 // Preconditions: The signal mutex must be locked. 842 func (t *Task) participateGroupStopLocked() bool { 843 if t.groupStopAcknowledged { 844 return false 845 } 846 t.groupStopAcknowledged = true 847 t.tg.groupStopPendingCount-- 848 if t.tg.groupStopPendingCount != 0 { 849 return false 850 } 851 if t.tg.groupStopComplete { 852 return false 853 } 854 t.Debugf("Completing group stop") 855 t.tg.groupStopComplete = true 856 t.tg.groupStopWaitable = true 857 t.tg.groupContNotify = false 858 t.tg.groupContWaitable = false 859 return true 860 } 861 862 // signalStop sends a signal to t's thread group of a new group stop, group 863 // continue, or ptrace stop, if appropriate. code and status are set in the 864 // signal sent to tg, if any. 865 // 866 // Preconditions: The TaskSet mutex must be locked (for reading or writing). 867 func (t *Task) signalStop(target *Task, code int32, status int32) { 868 t.tg.signalHandlers.mu.Lock() 869 defer t.tg.signalHandlers.mu.Unlock() 870 act, ok := t.tg.signalHandlers.actions[linux.SIGCHLD] 871 if !ok || (act.Handler != linux.SIG_IGN && act.Flags&linux.SA_NOCLDSTOP == 0) { 872 sigchld := &linux.SignalInfo{ 873 Signo: int32(linux.SIGCHLD), 874 Code: code, 875 } 876 sigchld.SetPID(int32(t.tg.pidns.tids[target])) 877 sigchld.SetUID(int32(target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow())) 878 sigchld.SetStatus(status) 879 // TODO(b/72102453): Set utime, stime. 880 t.sendSignalLocked(sigchld, true /* group */) 881 } 882 } 883 884 // The runInterrupt state handles conditions indicated by interrupts. 885 // 886 // +stateify savable 887 type runInterrupt struct{} 888 889 func (*runInterrupt) execute(t *Task) taskRunState { 890 // Interrupts are de-duplicated (t.unsetInterrupted() will undo the effect 891 // of all previous calls to t.interrupted() regardless of how many such 892 // calls there have been), so early exits from this function must re-enter 893 // the runInterrupt state to check for more interrupt-signaled conditions. 894 895 t.tg.signalHandlers.mu.Lock() 896 897 // Did we just leave a group stop? 898 if t.tg.groupContNotify { 899 t.tg.groupContNotify = false 900 sig := t.tg.groupStopSignal 901 intr := t.tg.groupContInterrupted 902 t.tg.signalHandlers.mu.Unlock() 903 t.tg.pidns.owner.mu.RLock() 904 // For consistency with Linux, if the parent and (thread group 905 // leader's) tracer are in the same thread group, deduplicate 906 // notifications. 907 notifyParent := t.tg.leader.parent != nil 908 if tracer := t.tg.leader.Tracer(); tracer != nil { 909 if notifyParent && tracer.tg == t.tg.leader.parent.tg { 910 notifyParent = false 911 } 912 // Sending CLD_STOPPED to the tracer doesn't really make any sense; 913 // the thread group leader may have already entered the stop and 914 // notified its tracer accordingly. But it's consistent with 915 // Linux... 916 if intr { 917 tracer.signalStop(t.tg.leader, linux.CLD_STOPPED, int32(sig)) 918 if !notifyParent { 919 tracer.tg.eventQueue.Notify(EventGroupContinue | EventTraceeStop | EventChildGroupStop) 920 } else { 921 tracer.tg.eventQueue.Notify(EventGroupContinue | EventTraceeStop) 922 } 923 } else { 924 tracer.signalStop(t.tg.leader, linux.CLD_CONTINUED, int32(sig)) 925 tracer.tg.eventQueue.Notify(EventGroupContinue) 926 } 927 } 928 if notifyParent { 929 // If groupContInterrupted, do as Linux does and pretend the group 930 // stop completed just before it ended. The theoretical behavior in 931 // this case would be to send a SIGCHLD indicating the completed 932 // stop, followed by a SIGCHLD indicating the continue. However, 933 // SIGCHLD is a standard signal, so the latter would always be 934 // dropped. Hence sending only the former is equivalent. 935 if intr { 936 t.tg.leader.parent.signalStop(t.tg.leader, linux.CLD_STOPPED, int32(sig)) 937 t.tg.leader.parent.tg.eventQueue.Notify(EventGroupContinue | EventChildGroupStop) 938 } else { 939 t.tg.leader.parent.signalStop(t.tg.leader, linux.CLD_CONTINUED, int32(sig)) 940 t.tg.leader.parent.tg.eventQueue.Notify(EventGroupContinue) 941 } 942 } 943 t.tg.pidns.owner.mu.RUnlock() 944 return (*runInterrupt)(nil) 945 } 946 947 // Do we need to enter a group stop or related ptrace stop? This path is 948 // analogous to Linux's kernel/signal.c:get_signal() => do_signal_stop() 949 // (with ptrace enabled) and do_jobctl_trap(). 950 if t.groupStopPending || t.trapStopPending || t.trapNotifyPending { 951 sig := t.tg.groupStopSignal 952 notifyParent := false 953 if t.groupStopPending { 954 t.groupStopPending = false 955 // We care about t.tg.groupStopSignal (for tracer notification) 956 // even if this doesn't complete a group stop, so keep the 957 // value of sig we've already read. 958 notifyParent = t.participateGroupStopLocked() 959 } 960 t.trapStopPending = false 961 t.trapNotifyPending = false 962 // Drop the signal mutex so we can take the TaskSet mutex. 963 t.tg.signalHandlers.mu.Unlock() 964 965 t.tg.pidns.owner.mu.RLock() 966 if t.tg.leader.parent == nil { 967 notifyParent = false 968 } 969 if tracer := t.Tracer(); tracer != nil { 970 if t.ptraceSeized { 971 if sig == 0 { 972 sig = linux.SIGTRAP 973 } 974 // "If tracee was attached using PTRACE_SEIZE, group-stop is 975 // indicated by PTRACE_EVENT_STOP: status>>16 == 976 // PTRACE_EVENT_STOP. This allows detection of group-stops 977 // without requiring an extra PTRACE_GETSIGINFO call." - 978 // "Group-stop", ptrace(2) 979 t.ptraceCode = int32(sig) | linux.PTRACE_EVENT_STOP<<8 980 t.ptraceSiginfo = &linux.SignalInfo{ 981 Signo: int32(sig), 982 Code: t.ptraceCode, 983 } 984 t.ptraceSiginfo.SetPID(int32(t.tg.pidns.tids[t])) 985 t.ptraceSiginfo.SetUID(int32(t.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow())) 986 } else { 987 t.ptraceCode = int32(sig) 988 t.ptraceSiginfo = nil 989 } 990 if t.beginPtraceStopLocked() { 991 tracer.signalStop(t, linux.CLD_STOPPED, int32(sig)) 992 // For consistency with Linux, if the parent and tracer are in the 993 // same thread group, deduplicate notification signals. 994 if notifyParent && tracer.tg == t.tg.leader.parent.tg { 995 notifyParent = false 996 tracer.tg.eventQueue.Notify(EventChildGroupStop | EventTraceeStop) 997 } else { 998 tracer.tg.eventQueue.Notify(EventTraceeStop) 999 } 1000 } 1001 } else { 1002 t.tg.signalHandlers.mu.Lock() 1003 if !t.killedLocked() { 1004 t.beginInternalStopLocked((*groupStop)(nil)) 1005 } 1006 t.tg.signalHandlers.mu.Unlock() 1007 } 1008 if notifyParent { 1009 t.tg.leader.parent.signalStop(t.tg.leader, linux.CLD_STOPPED, int32(sig)) 1010 t.tg.leader.parent.tg.eventQueue.Notify(EventChildGroupStop) 1011 } 1012 t.tg.pidns.owner.mu.RUnlock() 1013 1014 return (*runInterrupt)(nil) 1015 } 1016 1017 // Are there signals pending? 1018 if info := t.dequeueSignalLocked(t.signalMask); info != nil { 1019 t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch()) 1020 1021 if linux.SignalSetOf(linux.Signal(info.Signo))&StopSignals != 0 { 1022 // Indicate that we've dequeued a stop signal before unlocking the 1023 // signal mutex; initiateGroupStop will check for races with 1024 // endGroupStopLocked after relocking it. 1025 t.tg.groupStopDequeued = true 1026 } 1027 if t.ptraceSignalLocked(info) { 1028 // Dequeueing the signal action must wait until after the 1029 // signal-delivery-stop ends since the tracer can change or 1030 // suppress the signal. 1031 t.tg.signalHandlers.mu.Unlock() 1032 return (*runInterruptAfterSignalDeliveryStop)(nil) 1033 } 1034 act := t.tg.signalHandlers.dequeueAction(linux.Signal(info.Signo)) 1035 t.tg.signalHandlers.mu.Unlock() 1036 return t.deliverSignal(info, act) 1037 } 1038 1039 t.unsetInterrupted() 1040 t.tg.signalHandlers.mu.Unlock() 1041 return (*runApp)(nil) 1042 } 1043 1044 // +stateify savable 1045 type runInterruptAfterSignalDeliveryStop struct{} 1046 1047 func (*runInterruptAfterSignalDeliveryStop) execute(t *Task) taskRunState { 1048 t.tg.pidns.owner.mu.Lock() 1049 // Can't defer unlock: deliverSignal must be called without holding TaskSet 1050 // mutex. 1051 sig := linux.Signal(t.ptraceCode) 1052 defer func() { 1053 t.ptraceSiginfo = nil 1054 }() 1055 if !sig.IsValid() { 1056 t.tg.pidns.owner.mu.Unlock() 1057 return (*runInterrupt)(nil) 1058 } 1059 info := t.ptraceSiginfo 1060 if sig != linux.Signal(info.Signo) { 1061 info.Signo = int32(sig) 1062 info.Errno = 0 1063 info.Code = linux.SI_USER 1064 // pid isn't a valid field for all signal numbers, but Linux 1065 // doesn't care (kernel/signal.c:ptrace_signal()). 1066 // 1067 // Linux uses t->parent for the tid and uid here, which is the tracer 1068 // if it hasn't detached or the real parent otherwise. 1069 parent := t.parent 1070 if tracer := t.Tracer(); tracer != nil { 1071 parent = tracer 1072 } 1073 if parent == nil { 1074 // Tracer has detached and t was created by Kernel.CreateProcess(). 1075 // Pretend the parent is in an ancestor PID + user namespace. 1076 info.SetPID(0) 1077 info.SetUID(int32(auth.OverflowUID)) 1078 } else { 1079 info.SetPID(int32(t.tg.pidns.tids[parent])) 1080 info.SetUID(int32(parent.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow())) 1081 } 1082 } 1083 t.tg.signalHandlers.mu.Lock() 1084 t.tg.pidns.owner.mu.Unlock() 1085 // If the signal is masked, re-queue it. 1086 if linux.SignalSetOf(sig)&t.signalMask != 0 { 1087 t.sendSignalLocked(info, false /* group */) 1088 t.tg.signalHandlers.mu.Unlock() 1089 return (*runInterrupt)(nil) 1090 } 1091 act := t.tg.signalHandlers.dequeueAction(linux.Signal(info.Signo)) 1092 t.tg.signalHandlers.mu.Unlock() 1093 return t.deliverSignal(info, act) 1094 } 1095 1096 // SignalRegister registers a waiter for pending signals. 1097 func (t *Task) SignalRegister(e *waiter.Entry, mask waiter.EventMask) { 1098 t.tg.signalHandlers.mu.Lock() 1099 t.signalQueue.EventRegister(e, mask) 1100 t.tg.signalHandlers.mu.Unlock() 1101 } 1102 1103 // SignalUnregister unregisters a waiter for pending signals. 1104 func (t *Task) SignalUnregister(e *waiter.Entry) { 1105 t.tg.signalHandlers.mu.Lock() 1106 t.signalQueue.EventUnregister(e) 1107 t.tg.signalHandlers.mu.Unlock() 1108 }