github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/kernel/task_signals.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 // This file defines the behavior of task signal handling. 18 19 import ( 20 "fmt" 21 "time" 22 23 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 24 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 25 "github.com/MerlinKodo/gvisor/pkg/eventchannel" 26 "github.com/MerlinKodo/gvisor/pkg/hostarch" 27 "github.com/MerlinKodo/gvisor/pkg/sentry/arch" 28 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 29 ucspb "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" 30 "github.com/MerlinKodo/gvisor/pkg/waiter" 31 ) 32 33 // SignalAction is an internal signal action. 34 type SignalAction int 35 36 // Available signal actions. 37 // Note that although we refer the complete set internally, 38 // the application is only capable of using the Default and 39 // Ignore actions from the system call interface. 40 const ( 41 SignalActionTerm SignalAction = iota 42 SignalActionCore 43 SignalActionStop 44 SignalActionIgnore 45 SignalActionHandler 46 ) 47 48 // Default signal handler actions. Note that for most signals, 49 // (except SIGKILL and SIGSTOP) these can be overridden by the app. 50 var defaultActions = map[linux.Signal]SignalAction{ 51 // POSIX.1-1990 standard. 52 linux.SIGHUP: SignalActionTerm, 53 linux.SIGINT: SignalActionTerm, 54 linux.SIGQUIT: SignalActionCore, 55 linux.SIGILL: SignalActionCore, 56 linux.SIGABRT: SignalActionCore, 57 linux.SIGFPE: SignalActionCore, 58 linux.SIGKILL: SignalActionTerm, // but see ThreadGroup.applySignalSideEffects 59 linux.SIGSEGV: SignalActionCore, 60 linux.SIGPIPE: SignalActionTerm, 61 linux.SIGALRM: SignalActionTerm, 62 linux.SIGTERM: SignalActionTerm, 63 linux.SIGUSR1: SignalActionTerm, 64 linux.SIGUSR2: SignalActionTerm, 65 linux.SIGCHLD: SignalActionIgnore, 66 linux.SIGCONT: SignalActionIgnore, // but see ThreadGroup.applySignalSideEffects 67 linux.SIGSTOP: SignalActionStop, 68 linux.SIGTSTP: SignalActionStop, 69 linux.SIGTTIN: SignalActionStop, 70 linux.SIGTTOU: SignalActionStop, 71 // POSIX.1-2001 standard. 72 linux.SIGBUS: SignalActionCore, 73 linux.SIGPROF: SignalActionTerm, 74 linux.SIGSYS: SignalActionCore, 75 linux.SIGTRAP: SignalActionCore, 76 linux.SIGURG: SignalActionIgnore, 77 linux.SIGVTALRM: SignalActionTerm, 78 linux.SIGXCPU: SignalActionCore, 79 linux.SIGXFSZ: SignalActionCore, 80 // The rest on linux. 81 linux.SIGSTKFLT: SignalActionTerm, 82 linux.SIGIO: SignalActionTerm, 83 linux.SIGPWR: SignalActionTerm, 84 linux.SIGWINCH: SignalActionIgnore, 85 } 86 87 // computeAction figures out what to do given a signal number 88 // and an linux.SigAction. SIGSTOP always results in a SignalActionStop, 89 // and SIGKILL always results in a SignalActionTerm. 90 // Signal 0 is always ignored as many programs use it for various internal functions 91 // and don't expect it to do anything. 92 // 93 // In the event the signal is not one of these, act.Handler determines what 94 // happens next. 95 // If act.Handler is: 96 // 0, the default action is taken; 97 // 1, the signal is ignored; 98 // anything else, the function returns SignalActionHandler. 99 func computeAction(sig linux.Signal, act linux.SigAction) SignalAction { 100 switch sig { 101 case linux.SIGSTOP: 102 return SignalActionStop 103 case linux.SIGKILL: 104 return SignalActionTerm 105 case linux.Signal(0): 106 return SignalActionIgnore 107 } 108 109 switch act.Handler { 110 case linux.SIG_DFL: 111 return defaultActions[sig] 112 case linux.SIG_IGN: 113 return SignalActionIgnore 114 default: 115 return SignalActionHandler 116 } 117 } 118 119 // UnblockableSignals contains the set of signals which cannot be blocked. 120 var UnblockableSignals = linux.MakeSignalSet(linux.SIGKILL, linux.SIGSTOP) 121 122 // StopSignals is the set of signals whose default action is SignalActionStop. 123 var StopSignals = linux.MakeSignalSet(linux.SIGSTOP, linux.SIGTSTP, linux.SIGTTIN, linux.SIGTTOU) 124 125 // dequeueSignalLocked returns a pending signal that is *not* included in mask. 126 // If there are no pending unmasked signals, dequeueSignalLocked returns nil. 127 // 128 // Preconditions: t.tg.signalHandlers.mu must be locked. 129 func (t *Task) dequeueSignalLocked(mask linux.SignalSet) *linux.SignalInfo { 130 if info := t.pendingSignals.dequeue(mask); info != nil { 131 return info 132 } 133 return t.tg.pendingSignals.dequeue(mask) 134 } 135 136 // discardSpecificLocked removes all instances of the given signal from all 137 // signal queues in tg. 138 // 139 // Preconditions: The signal mutex must be locked. 140 func (tg *ThreadGroup) discardSpecificLocked(sig linux.Signal) { 141 tg.pendingSignals.discardSpecific(sig) 142 for t := tg.tasks.Front(); t != nil; t = t.Next() { 143 t.pendingSignals.discardSpecific(sig) 144 } 145 } 146 147 // PendingSignals returns the set of pending signals. 148 func (t *Task) PendingSignals() linux.SignalSet { 149 t.tg.pidns.owner.mu.RLock() 150 defer t.tg.pidns.owner.mu.RUnlock() 151 t.tg.signalHandlers.mu.Lock() 152 defer t.tg.signalHandlers.mu.Unlock() 153 return t.pendingSignals.pendingSet | t.tg.pendingSignals.pendingSet 154 } 155 156 // deliverSignal delivers the given signal and returns the following run state. 157 func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRunState { 158 sig := linux.Signal(info.Signo) 159 sigact := computeAction(sig, act) 160 161 if t.haveSyscallReturn { 162 if sre, ok := linuxerr.SyscallRestartErrorFromReturn(t.Arch().Return()); ok { 163 // Signals that are ignored, cause a thread group stop, or 164 // terminate the thread group do not interact with interrupted 165 // syscalls; in Linux terms, they are never returned to the signal 166 // handling path from get_signal => get_signal_to_deliver. The 167 // behavior of an interrupted syscall is determined by the first 168 // signal that is actually handled (by userspace). 169 if sigact == SignalActionHandler { 170 switch { 171 case sre == linuxerr.ERESTARTNOHAND: 172 fallthrough 173 case sre == linuxerr.ERESTART_RESTARTBLOCK: 174 fallthrough 175 case (sre == linuxerr.ERESTARTSYS && act.Flags&linux.SA_RESTART == 0): 176 t.Debugf("Not restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo) 177 t.Arch().SetReturn(uintptr(-ExtractErrno(linuxerr.EINTR, -1))) 178 default: 179 t.Debugf("Restarting syscall %d: interrupted by signal %d", t.Arch().SyscallNo(), info.Signo) 180 t.Arch().RestartSyscall() 181 } 182 } 183 } 184 } 185 186 switch sigact { 187 case SignalActionTerm, SignalActionCore: 188 // "Default action is to terminate the process." - signal(7) 189 190 // Emit an event channel messages related to this uncaught signal. 191 ucs := &ucspb.UncaughtSignal{ 192 Tid: int32(t.Kernel().TaskSet().Root.IDOfTask(t)), 193 Pid: int32(t.Kernel().TaskSet().Root.IDOfThreadGroup(t.ThreadGroup())), 194 Registers: t.Arch().StateData().Proto(), 195 SignalNumber: info.Signo, 196 } 197 198 // Attach an fault address if appropriate. 199 switch sig { 200 case linux.SIGSEGV, linux.SIGFPE, linux.SIGILL, linux.SIGTRAP, linux.SIGBUS: 201 ucs.FaultAddr = info.Addr() 202 } 203 204 t.Debugf("Signal %d, PID: %d, TID: %d, fault addr: %#x: terminating thread group", ucs.Pid, ucs.Tid, ucs.FaultAddr, info.Signo) 205 eventchannel.Emit(ucs) 206 207 t.PrepareGroupExit(linux.WaitStatusTerminationSignal(sig)) 208 return (*runExit)(nil) 209 210 case SignalActionStop: 211 // "Default action is to stop the process." 212 t.initiateGroupStop(info) 213 214 case SignalActionIgnore: 215 // "Default action is to ignore the signal." 216 t.Debugf("Signal %d: ignored", info.Signo) 217 218 case SignalActionHandler: 219 // Try to deliver the signal to the user-configured handler. 220 t.Debugf("Signal %d: delivering to handler", info.Signo) 221 if err := t.deliverSignalToHandler(info, act); err != nil { 222 // This is not a warning, it can occur during normal operation. 223 t.Debugf("Failed to deliver signal %+v to user handler: %v", info, err) 224 225 // Send a forced SIGSEGV. If the signal that couldn't be delivered 226 // was a SIGSEGV, force the handler to SIG_DFL. 227 t.forceSignal(linux.SIGSEGV, sig == linux.SIGSEGV /* unconditional */) 228 t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) 229 } 230 231 default: 232 panic(fmt.Sprintf("Unknown signal action %+v, %d?", info, computeAction(sig, act))) 233 } 234 return (*runInterrupt)(nil) 235 } 236 237 // deliverSignalToHandler changes the task's userspace state to enter the given 238 // user-configured handler for the given signal. 239 func (t *Task) deliverSignalToHandler(info *linux.SignalInfo, act linux.SigAction) error { 240 // Signal delivery to an application handler interrupts restartable 241 // sequences. 242 t.rseqInterrupt() 243 244 // Are executing on the main stack, 245 // or the provided alternate stack? 246 sp := hostarch.Addr(t.Arch().Stack()) 247 248 // N.B. This is a *copy* of the alternate stack that the user's signal 249 // handler expects to see in its ucontext (even if it's not in use). 250 alt := t.signalStack 251 if act.Flags&linux.SA_ONSTACK != 0 && alt.IsEnabled() { 252 alt.Flags |= linux.SS_ONSTACK 253 if !alt.Contains(sp) { 254 sp = hostarch.Addr(alt.Top()) 255 } 256 } 257 258 mm := t.MemoryManager() 259 // Set up the signal handler. If we have a saved signal mask, the signal 260 // handler should run with the current mask, but sigreturn should restore 261 // the saved one. 262 st := &arch.Stack{ 263 Arch: t.Arch(), 264 IO: mm, 265 Bottom: sp, 266 } 267 mask := linux.SignalSet(t.signalMask.Load()) 268 if t.haveSavedSignalMask { 269 mask = t.savedSignalMask 270 } 271 272 // Set up the restorer. 273 // x86-64 should always uses SA_RESTORER, but this flag is optional on other platforms. 274 // Please see the linux code as reference: 275 // linux/arch/x86/kernel/signal.c:__setup_rt_frame() 276 // If SA_RESTORER is not configured, we can use the sigreturn trampolines 277 // the vdso provides instead. 278 // Please see the linux code as reference: 279 // linux/arch/arm64/kernel/signal.c:setup_return() 280 if act.Flags&linux.SA_RESTORER == 0 { 281 act.Restorer = mm.VDSOSigReturn() 282 } 283 284 if err := t.Arch().SignalSetup(st, &act, info, &alt, mask, t.k.featureSet); err != nil { 285 return err 286 } 287 t.p.FullStateChanged() 288 t.haveSavedSignalMask = false 289 290 // Add our signal mask. 291 newMask := linux.SignalSet(t.signalMask.Load()) | act.Mask 292 if act.Flags&linux.SA_NODEFER == 0 { 293 newMask |= linux.SignalSetOf(linux.Signal(info.Signo)) 294 } 295 t.SetSignalMask(newMask) 296 297 return nil 298 } 299 300 var ctrlResume = &SyscallControl{ignoreReturn: true} 301 302 // SignalReturn implements sigreturn(2) (if rt is false) or rt_sigreturn(2) (if 303 // rt is true). 304 func (t *Task) SignalReturn(rt bool) (*SyscallControl, error) { 305 st := t.Stack() 306 sigset, alt, err := t.Arch().SignalRestore(st, rt, t.k.featureSet) 307 if err != nil { 308 // sigreturn syscalls never return errors. 309 t.Debugf("failed to restore from a signal frame: %v", err) 310 t.forceSignal(linux.SIGSEGV, false /* unconditional */) 311 t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) 312 return nil, err 313 } 314 315 // Attempt to record the given signal stack. Note that we silently 316 // ignore failures here, as does Linux. Only an EFAULT may be 317 // generated, but SignalRestore has already deserialized the entire 318 // frame successfully. 319 t.SetSignalStack(alt) 320 321 // Restore our signal mask. SIGKILL and SIGSTOP should not be blocked. 322 t.SetSignalMask(sigset &^ UnblockableSignals) 323 t.p.FullStateChanged() 324 325 return ctrlResume, nil 326 } 327 328 // Sigtimedwait implements the semantics of sigtimedwait(2). 329 // 330 // Preconditions: 331 // - The caller must be running on the task goroutine. 332 // - t.exitState < TaskExitZombie. 333 func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux.SignalInfo, error) { 334 // set is the set of signals we're interested in; invert it to get the set 335 // of signals to block. 336 mask := ^(set &^ UnblockableSignals) 337 338 t.tg.signalHandlers.mu.Lock() 339 defer t.tg.signalHandlers.mu.Unlock() 340 if info := t.dequeueSignalLocked(mask); info != nil { 341 return info, nil 342 } 343 344 if timeout == 0 { 345 return nil, linuxerr.EAGAIN 346 } 347 348 // Unblock signals we're waiting for. Remember the original signal mask so 349 // that Task.sendSignalTimerLocked doesn't discard ignored signals that 350 // we're temporarily unblocking. 351 t.realSignalMask = linux.SignalSet(t.signalMask.RacyLoad()) 352 t.setSignalMaskLocked(t.realSignalMask & mask) 353 354 // Wait for a timeout or new signal. 355 t.tg.signalHandlers.mu.Unlock() 356 _, err := t.BlockWithTimeout(nil, true, timeout) 357 t.tg.signalHandlers.mu.Lock() 358 359 // Restore the original signal mask. 360 t.setSignalMaskLocked(t.realSignalMask) 361 t.realSignalMask = 0 362 363 if info := t.dequeueSignalLocked(mask); info != nil { 364 return info, nil 365 } 366 if err == linuxerr.ETIMEDOUT { 367 return nil, linuxerr.EAGAIN 368 } 369 return nil, err 370 } 371 372 // SendSignal sends the given signal to t. 373 // 374 // The following errors may be returned: 375 // 376 // linuxerr.ESRCH - The task has exited. 377 // linuxerr.EINVAL - The signal is not valid. 378 // linuxerr.EAGAIN - THe signal is realtime, and cannot be queued. 379 func (t *Task) SendSignal(info *linux.SignalInfo) error { 380 t.tg.pidns.owner.mu.RLock() 381 defer t.tg.pidns.owner.mu.RUnlock() 382 t.tg.signalHandlers.mu.Lock() 383 defer t.tg.signalHandlers.mu.Unlock() 384 return t.sendSignalLocked(info, false /* group */) 385 } 386 387 // SendGroupSignal sends the given signal to t's thread group. 388 func (t *Task) SendGroupSignal(info *linux.SignalInfo) error { 389 t.tg.pidns.owner.mu.RLock() 390 defer t.tg.pidns.owner.mu.RUnlock() 391 t.tg.signalHandlers.mu.Lock() 392 defer t.tg.signalHandlers.mu.Unlock() 393 return t.sendSignalLocked(info, true /* group */) 394 } 395 396 // SendSignal sends the given signal to tg, using tg's leader to determine if 397 // the signal is blocked. 398 func (tg *ThreadGroup) SendSignal(info *linux.SignalInfo) error { 399 tg.pidns.owner.mu.RLock() 400 defer tg.pidns.owner.mu.RUnlock() 401 tg.signalHandlers.mu.Lock() 402 defer tg.signalHandlers.mu.Unlock() 403 return tg.leader.sendSignalLocked(info, true /* group */) 404 } 405 406 func (t *Task) sendSignalLocked(info *linux.SignalInfo, group bool) error { 407 return t.sendSignalTimerLocked(info, group, nil) 408 } 409 410 func (t *Task) sendSignalTimerLocked(info *linux.SignalInfo, group bool, timer *IntervalTimer) error { 411 if t.exitState == TaskExitDead { 412 return linuxerr.ESRCH 413 } 414 sig := linux.Signal(info.Signo) 415 if sig == 0 { 416 return nil 417 } 418 if !sig.IsValid() { 419 return linuxerr.EINVAL 420 } 421 422 // Signal side effects apply even if the signal is ultimately discarded. 423 t.tg.applySignalSideEffectsLocked(sig) 424 425 // TODO: "Only signals for which the "init" process has established a 426 // signal handler can be sent to the "init" process by other members of the 427 // PID namespace. This restriction applies even to privileged processes, 428 // and prevents other members of the PID namespace from accidentally 429 // killing the "init" process." - pid_namespaces(7). We don't currently do 430 // this for child namespaces, though we should; we also don't do this for 431 // the root namespace (the same restriction applies to global init on 432 // Linux), where whether or not we should is much murkier. In practice, 433 // most sandboxed applications are not prepared to function as an init 434 // process. 435 436 // Unmasked, ignored signals are discarded without being queued, unless 437 // they will be visible to a tracer. Even for group signals, it's the 438 // originally-targeted task's signal mask and tracer that matter; compare 439 // Linux's kernel/signal.c:__send_signal() => prepare_signal() => 440 // sig_ignored(). 441 ignored := computeAction(sig, t.tg.signalHandlers.actions[sig]) == SignalActionIgnore 442 if sigset := linux.SignalSetOf(sig); sigset&linux.SignalSet(t.signalMask.RacyLoad()) == 0 && sigset&t.realSignalMask == 0 && ignored && !t.hasTracer() { 443 t.Debugf("Discarding ignored signal %d", sig) 444 if timer != nil { 445 timer.signalRejectedLocked() 446 } 447 return nil 448 } 449 450 q := &t.pendingSignals 451 if group { 452 q = &t.tg.pendingSignals 453 } 454 if !q.enqueue(info, timer) { 455 if sig.IsRealtime() { 456 return linuxerr.EAGAIN 457 } 458 t.Debugf("Discarding duplicate signal %d", sig) 459 if timer != nil { 460 timer.signalRejectedLocked() 461 } 462 return nil 463 } 464 465 // Find a receiver to notify. Note that the task we choose to notify, if 466 // any, may not be the task that actually dequeues and handles the signal; 467 // e.g. a racing signal mask change may cause the notified task to become 468 // ineligible, or a racing sibling task may dequeue the signal first. 469 if t.canReceiveSignalLocked(sig) { 470 t.Debugf("Notified of signal %d", sig) 471 t.interrupt() 472 return nil 473 } 474 if group { 475 if nt := t.tg.findSignalReceiverLocked(sig); nt != nil { 476 nt.Debugf("Notified of group signal %d", sig) 477 nt.interrupt() 478 return nil 479 } 480 } 481 t.Debugf("No task notified of signal %d", sig) 482 return nil 483 } 484 485 func (tg *ThreadGroup) applySignalSideEffectsLocked(sig linux.Signal) { 486 switch { 487 case linux.SignalSetOf(sig)&StopSignals != 0: 488 // Stop signals cause all prior SIGCONT to be discarded. (This is 489 // despite the fact this has little effect since SIGCONT's most 490 // important effect is applied when the signal is sent in the branch 491 // below, not when the signal is delivered.) 492 tg.discardSpecificLocked(linux.SIGCONT) 493 case sig == linux.SIGCONT: 494 // "The SIGCONT signal has a side effect of waking up (all threads of) 495 // a group-stopped process. This side effect happens before 496 // signal-delivery-stop. The tracer can't suppress this side effect (it 497 // can only suppress signal injection, which only causes the SIGCONT 498 // handler to not be executed in the tracee, if such a handler is 499 // installed." - ptrace(2) 500 tg.endGroupStopLocked(true) 501 case sig == linux.SIGKILL: 502 // "SIGKILL does not generate signal-delivery-stop and therefore the 503 // tracer can't suppress it. SIGKILL kills even within system calls 504 // (syscall-exit-stop is not generated prior to death by SIGKILL)." - 505 // ptrace(2) 506 // 507 // Note that this differs from ThreadGroup.requestExit in that it 508 // ignores tg.execing. 509 if !tg.exiting { 510 tg.exiting = true 511 tg.exitStatus = linux.WaitStatusTerminationSignal(linux.SIGKILL) 512 } 513 for t := tg.tasks.Front(); t != nil; t = t.Next() { 514 t.killLocked() 515 } 516 } 517 } 518 519 // canReceiveSignalLocked returns true if t should be interrupted to receive 520 // the given signal. canReceiveSignalLocked is analogous to Linux's 521 // kernel/signal.c:wants_signal(), but see below for divergences. 522 // 523 // Preconditions: The signal mutex must be locked. 524 func (t *Task) canReceiveSignalLocked(sig linux.Signal) bool { 525 // Notify that the signal is queued. 526 t.signalQueue.Notify(waiter.EventMask(linux.MakeSignalSet(sig))) 527 528 // - Do not choose tasks that are blocking the signal. 529 if linux.SignalSetOf(sig)&linux.SignalSet(t.signalMask.RacyLoad()) != 0 { 530 return false 531 } 532 // - No need to check Task.exitState, as the exit path sets every bit in the 533 // signal mask when it transitions from TaskExitNone to TaskExitInitiated. 534 // - No special case for SIGKILL: SIGKILL already interrupted all tasks in the 535 // task group via applySignalSideEffects => killLocked. 536 // - Do not choose stopped tasks, which cannot handle signals. 537 if t.stop != nil { 538 return false 539 } 540 // - Do not choose tasks that have already been interrupted, as they may be 541 // busy handling another signal. 542 if len(t.interruptChan) != 0 { 543 return false 544 } 545 return true 546 } 547 548 // findSignalReceiverLocked returns a task in tg that should be interrupted to 549 // receive the given signal. If no such task exists, findSignalReceiverLocked 550 // returns nil. 551 // 552 // Linux actually records curr_target to balance the group signal targets. 553 // 554 // Preconditions: The signal mutex must be locked. 555 func (tg *ThreadGroup) findSignalReceiverLocked(sig linux.Signal) *Task { 556 for t := tg.tasks.Front(); t != nil; t = t.Next() { 557 if t.canReceiveSignalLocked(sig) { 558 return t 559 } 560 } 561 return nil 562 } 563 564 // forceSignal ensures that the task is not ignoring or blocking the given 565 // signal. If unconditional is true, forceSignal takes action even if the 566 // signal isn't being ignored or blocked. 567 func (t *Task) forceSignal(sig linux.Signal, unconditional bool) { 568 t.tg.pidns.owner.mu.RLock() 569 defer t.tg.pidns.owner.mu.RUnlock() 570 t.tg.signalHandlers.mu.Lock() 571 defer t.tg.signalHandlers.mu.Unlock() 572 t.forceSignalLocked(sig, unconditional) 573 } 574 575 func (t *Task) forceSignalLocked(sig linux.Signal, unconditional bool) { 576 blocked := linux.SignalSetOf(sig)&linux.SignalSet(t.signalMask.RacyLoad()) != 0 577 act := t.tg.signalHandlers.actions[sig] 578 ignored := act.Handler == linux.SIG_IGN 579 if blocked || ignored || unconditional { 580 act.Handler = linux.SIG_DFL 581 t.tg.signalHandlers.actions[sig] = act 582 if blocked { 583 t.setSignalMaskLocked(linux.SignalSet(t.signalMask.RacyLoad()) &^ linux.SignalSetOf(sig)) 584 } 585 } 586 } 587 588 // SignalMask returns a copy of t's signal mask. 589 func (t *Task) SignalMask() linux.SignalSet { 590 return linux.SignalSet(t.signalMask.Load()) 591 } 592 593 // SetSignalMask sets t's signal mask. 594 // 595 // Preconditions: 596 // - The caller must be running on the task goroutine. 597 // - t.exitState < TaskExitZombie. 598 func (t *Task) SetSignalMask(mask linux.SignalSet) { 599 // By precondition, t prevents t.tg from completing an execve and mutating 600 // t.tg.signalHandlers, so we can skip the TaskSet mutex. 601 t.tg.signalHandlers.mu.Lock() 602 t.setSignalMaskLocked(mask) 603 t.tg.signalHandlers.mu.Unlock() 604 } 605 606 // Preconditions: The signal mutex must be locked. 607 func (t *Task) setSignalMaskLocked(mask linux.SignalSet) { 608 oldMask := linux.SignalSet(t.signalMask.RacyLoad()) 609 t.signalMask.Store(uint64(mask)) 610 611 // If the new mask blocks any signals that were not blocked by the old 612 // mask, and at least one such signal is pending in tg.pendingSignals, and 613 // t has been woken, it could be the case that t was woken to handle that 614 // signal, but will no longer do so as a result of its new signal mask, so 615 // we have to pick a replacement. 616 blocked := mask &^ oldMask 617 blockedGroupPending := blocked & t.tg.pendingSignals.pendingSet 618 if blockedGroupPending != 0 && t.interrupted() { 619 linux.ForEachSignal(blockedGroupPending, func(sig linux.Signal) { 620 if nt := t.tg.findSignalReceiverLocked(sig); nt != nil { 621 nt.interrupt() 622 return 623 } 624 }) 625 } 626 627 // Conversely, if the new mask unblocks any signals that were blocked by 628 // the old mask, and at least one such signal is pending, we may now need 629 // to handle that signal. 630 unblocked := oldMask &^ mask 631 unblockedPending := unblocked & (t.pendingSignals.pendingSet | t.tg.pendingSignals.pendingSet) 632 if unblockedPending != 0 { 633 t.interruptSelf() 634 } 635 } 636 637 // SetSavedSignalMask sets the saved signal mask (see Task.savedSignalMask's 638 // comment). 639 // 640 // Preconditions: The caller must be running on the task goroutine. 641 func (t *Task) SetSavedSignalMask(mask linux.SignalSet) { 642 t.savedSignalMask = mask 643 t.haveSavedSignalMask = true 644 } 645 646 // SignalStack returns the task-private signal stack. 647 // 648 // By precondition, a full state has to be pulled. 649 func (t *Task) SignalStack() linux.SignalStack { 650 alt := t.signalStack 651 if t.onSignalStack(alt) { 652 alt.Flags |= linux.SS_ONSTACK 653 } 654 return alt 655 } 656 657 // SigaltStack implements the sigaltstack syscall. 658 func (t *Task) SigaltStack(setaddr hostarch.Addr, oldaddr hostarch.Addr) (*SyscallControl, error) { 659 if err := t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch()); err != nil { 660 t.PrepareGroupExit(linux.WaitStatusTerminationSignal(linux.SIGILL)) 661 return CtrlDoExit, linuxerr.EFAULT 662 } 663 664 alt := t.SignalStack() 665 if oldaddr != 0 { 666 if _, err := alt.CopyOut(t, oldaddr); err != nil { 667 return nil, err 668 } 669 } 670 if setaddr != 0 { 671 if _, err := alt.CopyIn(t, setaddr); err != nil { 672 return nil, err 673 } 674 // The signal stack cannot be changed if the task is currently 675 // on the stack. This is enforced at the lowest level because 676 // these semantics apply to changing the signal stack via a 677 // ucontext during a signal handler. 678 if !t.SetSignalStack(alt) { 679 return nil, linuxerr.EPERM 680 } 681 } 682 return nil, nil 683 } 684 685 // onSignalStack returns true if the task is executing on the given signal stack. 686 func (t *Task) onSignalStack(alt linux.SignalStack) bool { 687 sp := hostarch.Addr(t.Arch().Stack()) 688 return alt.Contains(sp) 689 } 690 691 // SetSignalStack sets the task-private signal stack. 692 // 693 // This value may not be changed if the task is currently executing on the 694 // signal stack, i.e. if t.onSignalStack returns true. In this case, this 695 // function will return false. Otherwise, true is returned. 696 func (t *Task) SetSignalStack(alt linux.SignalStack) bool { 697 // Check that we're not executing on the stack. 698 if t.onSignalStack(t.signalStack) { 699 return false 700 } 701 702 if alt.Flags&linux.SS_DISABLE != 0 { 703 // Don't record anything beyond the flags. 704 t.signalStack = linux.SignalStack{ 705 Flags: linux.SS_DISABLE, 706 } 707 } else { 708 // Mask out irrelevant parts: only disable matters. 709 alt.Flags &= linux.SS_DISABLE 710 t.signalStack = alt 711 } 712 return true 713 } 714 715 // SetSigAction atomically sets the thread group's signal action for signal sig 716 // to *actptr (if actptr is not nil) and returns the old signal action. 717 func (tg *ThreadGroup) SetSigAction(sig linux.Signal, actptr *linux.SigAction) (linux.SigAction, error) { 718 if !sig.IsValid() { 719 return linux.SigAction{}, linuxerr.EINVAL 720 } 721 722 tg.pidns.owner.mu.RLock() 723 defer tg.pidns.owner.mu.RUnlock() 724 sh := tg.signalHandlers 725 sh.mu.Lock() 726 defer sh.mu.Unlock() 727 oldact := sh.actions[sig] 728 if actptr != nil { 729 if sig == linux.SIGKILL || sig == linux.SIGSTOP { 730 return oldact, linuxerr.EINVAL 731 } 732 733 act := *actptr 734 act.Mask &^= UnblockableSignals 735 sh.actions[sig] = act 736 // From POSIX, by way of Linux: 737 // 738 // "Setting a signal action to SIG_IGN for a signal that is pending 739 // shall cause the pending signal to be discarded, whether or not it is 740 // blocked." 741 // 742 // "Setting a signal action to SIG_DFL for a signal that is pending and 743 // whose default action is to ignore the signal (for example, SIGCHLD), 744 // shall cause the pending signal to be discarded, whether or not it is 745 // blocked." 746 if computeAction(sig, act) == SignalActionIgnore { 747 tg.discardSpecificLocked(sig) 748 } 749 } 750 return oldact, nil 751 } 752 753 // groupStop is a TaskStop placed on tasks that have received a stop signal 754 // (SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU). (The term "group-stop" originates from 755 // the ptrace man page.) 756 // 757 // +stateify savable 758 type groupStop struct{} 759 760 // Killable implements TaskStop.Killable. 761 func (*groupStop) Killable() bool { return true } 762 763 // initiateGroupStop attempts to initiate a group stop based on a 764 // previously-dequeued stop signal. 765 // 766 // Preconditions: The caller must be running on the task goroutine. 767 func (t *Task) initiateGroupStop(info *linux.SignalInfo) { 768 t.tg.pidns.owner.mu.RLock() 769 defer t.tg.pidns.owner.mu.RUnlock() 770 t.tg.signalHandlers.mu.Lock() 771 defer t.tg.signalHandlers.mu.Unlock() 772 if t.groupStopPending { 773 t.Debugf("Signal %d: not stopping thread group: lost to racing stop signal", info.Signo) 774 return 775 } 776 if !t.tg.groupStopDequeued { 777 t.Debugf("Signal %d: not stopping thread group: lost to racing SIGCONT", info.Signo) 778 return 779 } 780 if t.tg.exiting { 781 t.Debugf("Signal %d: not stopping thread group: lost to racing group exit", info.Signo) 782 return 783 } 784 if t.tg.execing != nil { 785 t.Debugf("Signal %d: not stopping thread group: lost to racing execve", info.Signo) 786 return 787 } 788 if !t.tg.groupStopComplete { 789 t.tg.groupStopSignal = linux.Signal(info.Signo) 790 } 791 t.tg.groupStopPendingCount = 0 792 for t2 := t.tg.tasks.Front(); t2 != nil; t2 = t2.Next() { 793 if t2.killedLocked() || t2.exitState >= TaskExitInitiated { 794 t2.groupStopPending = false 795 continue 796 } 797 t2.groupStopPending = true 798 t2.groupStopAcknowledged = false 799 if t2.ptraceSeized { 800 t2.trapNotifyPending = true 801 if s, ok := t2.stop.(*ptraceStop); ok && s.listen { 802 t2.endInternalStopLocked() 803 } 804 } 805 t2.interrupt() 806 t.tg.groupStopPendingCount++ 807 } 808 t.Debugf("Signal %d: stopping %d threads in thread group", info.Signo, t.tg.groupStopPendingCount) 809 } 810 811 // endGroupStopLocked ensures that all prior stop signals received by tg are 812 // not stopping tg and will not stop tg in the future. If broadcast is true, 813 // parent and tracer notification will be scheduled if appropriate. 814 // 815 // Preconditions: The signal mutex must be locked. 816 func (tg *ThreadGroup) endGroupStopLocked(broadcast bool) { 817 // Discard all previously-queued stop signals. 818 linux.ForEachSignal(StopSignals, tg.discardSpecificLocked) 819 820 if tg.groupStopPendingCount == 0 && !tg.groupStopComplete { 821 return 822 } 823 824 completeStr := "incomplete" 825 if tg.groupStopComplete { 826 completeStr = "complete" 827 } 828 tg.leader.Debugf("Ending %s group stop with %d threads pending", completeStr, tg.groupStopPendingCount) 829 for t := tg.tasks.Front(); t != nil; t = t.Next() { 830 t.groupStopPending = false 831 if t.ptraceSeized { 832 t.trapNotifyPending = true 833 if s, ok := t.stop.(*ptraceStop); ok && s.listen { 834 t.endInternalStopLocked() 835 } 836 } else { 837 if _, ok := t.stop.(*groupStop); ok { 838 t.endInternalStopLocked() 839 } 840 } 841 } 842 if broadcast { 843 // Instead of notifying the parent here, set groupContNotify so that 844 // one of the continuing tasks does so. (Linux does something similar.) 845 // The reason we do this is to keep locking sane. In order to send a 846 // signal to the parent, we need to lock its signal mutex, but we're 847 // already holding tg's signal mutex, and the TaskSet mutex must be 848 // locked for writing for us to hold two signal mutexes. Since we don't 849 // want to require this for endGroupStopLocked (which is called from 850 // signal-sending paths), nor do we want to lose atomicity by releasing 851 // the mutexes we're already holding, just let the continuing thread 852 // group deal with it. 853 tg.groupContNotify = true 854 tg.groupContInterrupted = !tg.groupStopComplete 855 tg.groupContWaitable = true 856 } 857 // Unsetting groupStopDequeued will cause racing calls to initiateGroupStop 858 // to recognize that the group stop has been cancelled. 859 tg.groupStopDequeued = false 860 tg.groupStopSignal = 0 861 tg.groupStopPendingCount = 0 862 tg.groupStopComplete = false 863 tg.groupStopWaitable = false 864 } 865 866 // participateGroupStopLocked is called to handle thread group side effects 867 // after t unsets t.groupStopPending. The caller must handle task side effects 868 // (e.g. placing the task goroutine into the group stop). It returns true if 869 // the caller must notify t.tg.leader's parent of a completed group stop (which 870 // participateGroupStopLocked cannot do due to holding the wrong locks). 871 // 872 // Preconditions: The signal mutex must be locked. 873 func (t *Task) participateGroupStopLocked() bool { 874 if t.groupStopAcknowledged { 875 return false 876 } 877 t.groupStopAcknowledged = true 878 t.tg.groupStopPendingCount-- 879 if t.tg.groupStopPendingCount != 0 { 880 return false 881 } 882 if t.tg.groupStopComplete { 883 return false 884 } 885 t.Debugf("Completing group stop") 886 t.tg.groupStopComplete = true 887 t.tg.groupStopWaitable = true 888 t.tg.groupContNotify = false 889 t.tg.groupContWaitable = false 890 return true 891 } 892 893 // signalStop sends a signal to t's thread group of a new group stop, group 894 // continue, or ptrace stop, if appropriate. code and status are set in the 895 // signal sent to tg, if any. 896 // 897 // Preconditions: The TaskSet mutex must be locked (for reading or writing). 898 func (t *Task) signalStop(target *Task, code int32, status int32) { 899 t.tg.signalHandlers.mu.Lock() 900 defer t.tg.signalHandlers.mu.Unlock() 901 act, ok := t.tg.signalHandlers.actions[linux.SIGCHLD] 902 if !ok || (act.Handler != linux.SIG_IGN && act.Flags&linux.SA_NOCLDSTOP == 0) { 903 sigchld := &linux.SignalInfo{ 904 Signo: int32(linux.SIGCHLD), 905 Code: code, 906 } 907 sigchld.SetPID(int32(t.tg.pidns.tids[target])) 908 sigchld.SetUID(int32(target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow())) 909 sigchld.SetStatus(status) 910 // TODO(b/72102453): Set utime, stime. 911 t.sendSignalLocked(sigchld, true /* group */) 912 } 913 } 914 915 // The runInterrupt state handles conditions indicated by interrupts. 916 // 917 // +stateify savable 918 type runInterrupt struct{} 919 920 func (*runInterrupt) execute(t *Task) taskRunState { 921 // Interrupts are de-duplicated (t.unsetInterrupted() will undo the effect 922 // of all previous calls to t.interrupted() regardless of how many such 923 // calls there have been), so early exits from this function must re-enter 924 // the runInterrupt state to check for more interrupt-signaled conditions. 925 926 t.tg.signalHandlers.mu.Lock() 927 928 // Did we just leave a group stop? 929 if t.tg.groupContNotify { 930 t.tg.groupContNotify = false 931 sig := t.tg.groupStopSignal 932 intr := t.tg.groupContInterrupted 933 t.tg.signalHandlers.mu.Unlock() 934 t.tg.pidns.owner.mu.RLock() 935 // For consistency with Linux, if the parent and (thread group 936 // leader's) tracer are in the same thread group, deduplicate 937 // notifications. 938 notifyParent := t.tg.leader.parent != nil 939 if tracer := t.tg.leader.Tracer(); tracer != nil { 940 if notifyParent && tracer.tg == t.tg.leader.parent.tg { 941 notifyParent = false 942 } 943 // Sending CLD_STOPPED to the tracer doesn't really make any sense; 944 // the thread group leader may have already entered the stop and 945 // notified its tracer accordingly. But it's consistent with 946 // Linux... 947 if intr { 948 tracer.signalStop(t.tg.leader, linux.CLD_STOPPED, int32(sig)) 949 if !notifyParent { 950 tracer.tg.eventQueue.Notify(EventGroupContinue | EventTraceeStop | EventChildGroupStop) 951 } else { 952 tracer.tg.eventQueue.Notify(EventGroupContinue | EventTraceeStop) 953 } 954 } else { 955 tracer.signalStop(t.tg.leader, linux.CLD_CONTINUED, int32(sig)) 956 tracer.tg.eventQueue.Notify(EventGroupContinue) 957 } 958 } 959 if notifyParent { 960 // If groupContInterrupted, do as Linux does and pretend the group 961 // stop completed just before it ended. The theoretical behavior in 962 // this case would be to send a SIGCHLD indicating the completed 963 // stop, followed by a SIGCHLD indicating the continue. However, 964 // SIGCHLD is a standard signal, so the latter would always be 965 // dropped. Hence sending only the former is equivalent. 966 if intr { 967 t.tg.leader.parent.signalStop(t.tg.leader, linux.CLD_STOPPED, int32(sig)) 968 t.tg.leader.parent.tg.eventQueue.Notify(EventGroupContinue | EventChildGroupStop) 969 } else { 970 t.tg.leader.parent.signalStop(t.tg.leader, linux.CLD_CONTINUED, int32(sig)) 971 t.tg.leader.parent.tg.eventQueue.Notify(EventGroupContinue) 972 } 973 } 974 t.tg.pidns.owner.mu.RUnlock() 975 return (*runInterrupt)(nil) 976 } 977 978 // Do we need to enter a group stop or related ptrace stop? This path is 979 // analogous to Linux's kernel/signal.c:get_signal() => do_signal_stop() 980 // (with ptrace enabled) and do_jobctl_trap(). 981 if t.groupStopPending || t.trapStopPending || t.trapNotifyPending { 982 sig := t.tg.groupStopSignal 983 notifyParent := false 984 if t.groupStopPending { 985 t.groupStopPending = false 986 // We care about t.tg.groupStopSignal (for tracer notification) 987 // even if this doesn't complete a group stop, so keep the 988 // value of sig we've already read. 989 notifyParent = t.participateGroupStopLocked() 990 } 991 t.trapStopPending = false 992 t.trapNotifyPending = false 993 // Drop the signal mutex so we can take the TaskSet mutex. 994 t.tg.signalHandlers.mu.Unlock() 995 996 t.tg.pidns.owner.mu.RLock() 997 if t.tg.leader.parent == nil { 998 notifyParent = false 999 } 1000 if tracer := t.Tracer(); tracer != nil { 1001 if t.ptraceSeized { 1002 if sig == 0 { 1003 sig = linux.SIGTRAP 1004 } 1005 // "If tracee was attached using PTRACE_SEIZE, group-stop is 1006 // indicated by PTRACE_EVENT_STOP: status>>16 == 1007 // PTRACE_EVENT_STOP. This allows detection of group-stops 1008 // without requiring an extra PTRACE_GETSIGINFO call." - 1009 // "Group-stop", ptrace(2) 1010 t.ptraceCode = int32(sig) | linux.PTRACE_EVENT_STOP<<8 1011 t.ptraceSiginfo = &linux.SignalInfo{ 1012 Signo: int32(sig), 1013 Code: t.ptraceCode, 1014 } 1015 t.ptraceSiginfo.SetPID(int32(t.tg.pidns.tids[t])) 1016 t.ptraceSiginfo.SetUID(int32(t.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow())) 1017 } else { 1018 t.ptraceCode = int32(sig) 1019 t.ptraceSiginfo = nil 1020 } 1021 if t.beginPtraceStopLocked() { 1022 tracer.signalStop(t, linux.CLD_STOPPED, int32(sig)) 1023 // For consistency with Linux, if the parent and tracer are in the 1024 // same thread group, deduplicate notification signals. 1025 if notifyParent && tracer.tg == t.tg.leader.parent.tg { 1026 notifyParent = false 1027 tracer.tg.eventQueue.Notify(EventChildGroupStop | EventTraceeStop) 1028 } else { 1029 tracer.tg.eventQueue.Notify(EventTraceeStop) 1030 } 1031 } 1032 } else { 1033 t.tg.signalHandlers.mu.Lock() 1034 if !t.killedLocked() { 1035 t.beginInternalStopLocked((*groupStop)(nil)) 1036 } 1037 t.tg.signalHandlers.mu.Unlock() 1038 } 1039 if notifyParent { 1040 t.tg.leader.parent.signalStop(t.tg.leader, linux.CLD_STOPPED, int32(sig)) 1041 t.tg.leader.parent.tg.eventQueue.Notify(EventChildGroupStop) 1042 } 1043 t.tg.pidns.owner.mu.RUnlock() 1044 1045 return (*runInterrupt)(nil) 1046 } 1047 1048 // Are there signals pending? 1049 if info := t.dequeueSignalLocked(linux.SignalSet(t.signalMask.RacyLoad())); info != nil { 1050 if err := t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch()); err != nil { 1051 t.PrepareGroupExit(linux.WaitStatusTerminationSignal(linux.SIGILL)) 1052 return (*runExit)(nil) 1053 } 1054 1055 if linux.SignalSetOf(linux.Signal(info.Signo))&StopSignals != 0 { 1056 // Indicate that we've dequeued a stop signal before unlocking the 1057 // signal mutex; initiateGroupStop will check for races with 1058 // endGroupStopLocked after relocking it. 1059 t.tg.groupStopDequeued = true 1060 } 1061 if t.ptraceSignalLocked(info) { 1062 // Dequeueing the signal action must wait until after the 1063 // signal-delivery-stop ends since the tracer can change or 1064 // suppress the signal. 1065 t.tg.signalHandlers.mu.Unlock() 1066 return (*runInterruptAfterSignalDeliveryStop)(nil) 1067 } 1068 act := t.tg.signalHandlers.dequeueAction(linux.Signal(info.Signo)) 1069 t.tg.signalHandlers.mu.Unlock() 1070 return t.deliverSignal(info, act) 1071 } 1072 1073 t.unsetInterrupted() 1074 t.tg.signalHandlers.mu.Unlock() 1075 return (*runApp)(nil) 1076 } 1077 1078 // +stateify savable 1079 type runInterruptAfterSignalDeliveryStop struct{} 1080 1081 func (*runInterruptAfterSignalDeliveryStop) execute(t *Task) taskRunState { 1082 t.tg.pidns.owner.mu.Lock() 1083 // Can't defer unlock: deliverSignal must be called without holding TaskSet 1084 // mutex. 1085 sig := linux.Signal(t.ptraceCode) 1086 defer func() { 1087 t.ptraceSiginfo = nil 1088 }() 1089 if !sig.IsValid() { 1090 t.tg.pidns.owner.mu.Unlock() 1091 return (*runInterrupt)(nil) 1092 } 1093 info := t.ptraceSiginfo 1094 if sig != linux.Signal(info.Signo) { 1095 info.Signo = int32(sig) 1096 info.Errno = 0 1097 info.Code = linux.SI_USER 1098 // pid isn't a valid field for all signal numbers, but Linux 1099 // doesn't care (kernel/signal.c:ptrace_signal()). 1100 // 1101 // Linux uses t->parent for the tid and uid here, which is the tracer 1102 // if it hasn't detached or the real parent otherwise. 1103 parent := t.parent 1104 if tracer := t.Tracer(); tracer != nil { 1105 parent = tracer 1106 } 1107 if parent == nil { 1108 // Tracer has detached and t was created by Kernel.CreateProcess(). 1109 // Pretend the parent is in an ancestor PID + user namespace. 1110 info.SetPID(0) 1111 info.SetUID(int32(auth.OverflowUID)) 1112 } else { 1113 info.SetPID(int32(t.tg.pidns.tids[parent])) 1114 info.SetUID(int32(parent.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow())) 1115 } 1116 } 1117 t.tg.signalHandlers.mu.Lock() 1118 t.tg.pidns.owner.mu.Unlock() 1119 // If the signal is masked, re-queue it. 1120 if linux.SignalSetOf(sig)&linux.SignalSet(t.signalMask.RacyLoad()) != 0 { 1121 t.sendSignalLocked(info, false /* group */) 1122 t.tg.signalHandlers.mu.Unlock() 1123 return (*runInterrupt)(nil) 1124 } 1125 act := t.tg.signalHandlers.dequeueAction(linux.Signal(info.Signo)) 1126 t.tg.signalHandlers.mu.Unlock() 1127 return t.deliverSignal(info, act) 1128 } 1129 1130 // SignalRegister registers a waiter for pending signals. 1131 func (t *Task) SignalRegister(e *waiter.Entry) { 1132 t.tg.signalHandlers.mu.Lock() 1133 t.signalQueue.EventRegister(e) 1134 t.tg.signalHandlers.mu.Unlock() 1135 } 1136 1137 // SignalUnregister unregisters a waiter for pending signals. 1138 func (t *Task) SignalUnregister(e *waiter.Entry) { 1139 t.tg.signalHandlers.mu.Lock() 1140 t.signalQueue.EventUnregister(e) 1141 t.tg.signalHandlers.mu.Unlock() 1142 }