gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/platform/systrap/sysmsg/sysmsg.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package sysmsg provides a stub signal handler and a communication protocol 16 // between stub threads and the Sentry. 17 // 18 // Note that this package is allowlisted for use of sync/atomic. 19 // 20 // +checkalignedignore 21 package sysmsg 22 23 import ( 24 "fmt" 25 "strings" 26 "sync/atomic" 27 28 "golang.org/x/sys/unix" 29 "gvisor.dev/gvisor/pkg/abi/linux" 30 "gvisor.dev/gvisor/pkg/hostarch" 31 "gvisor.dev/gvisor/pkg/sentry/platform" 32 ) 33 34 // LINT.IfChange 35 // Per-thread stack layout: 36 // 37 // *------------* 38 // | guard page | 39 // |------------| 40 // | | 41 // | sysstack | 42 // | | 43 // *------------* 44 // | guard page | 45 // |------------| 46 // | | 47 // | ^ | 48 // | / \ | 49 // | | | 50 // | altstack | 51 // |------------| 52 // | sysmsg | 53 // *------------* 54 const ( 55 // PerThreadMemSize is the size of a per-thread memory region. 56 PerThreadMemSize = 8 * hostarch.PageSize 57 // GuardSize is the size of an unmapped region which is placed right 58 // before the signal stack. 59 GuardSize = hostarch.PageSize 60 PerThreadPrivateStackOffset = GuardSize 61 PerThreadPrivateStackSize = 2 * hostarch.PageSize 62 // PerThreadStackSharedSize is the size of a per-thread stack region. 63 PerThreadSharedStackSize = 4 * hostarch.PageSize 64 PerThreadSharedStackOffset = 4 * hostarch.PageSize 65 // MsgOffsetFromStack is the offset of the Msg structure on 66 // the thread stack. 67 MsgOffsetFromSharedStack = PerThreadMemSize - hostarch.PageSize - PerThreadSharedStackOffset 68 69 // SpinningQueueMemSize is the size of a spinning queue memory region. 70 SpinningQueueMemSize = hostarch.PageSize 71 ) 72 73 // StackAddrToMsg returns an address of a sysmsg structure. 74 func StackAddrToMsg(sp uintptr) uintptr { 75 return sp + MsgOffsetFromSharedStack 76 } 77 78 // StackAddrToSyshandlerStack returns an address of a syshandler stack. 79 func StackAddrToSyshandlerStack(sp uintptr) uintptr { 80 return sp + PerThreadPrivateStackOffset + PerThreadPrivateStackSize 81 } 82 83 // MsgToStackAddr returns a start address of a stack. 84 func MsgToStackAddr(msg uintptr) uintptr { 85 return msg - MsgOffsetFromSharedStack 86 } 87 88 // ThreadState is used to store a state of the sysmsg thread. 89 type ThreadState uint32 90 91 // Set atomicaly sets the state value. 92 func (s *ThreadState) Set(state ThreadState) { 93 atomic.StoreUint32((*uint32)(s), uint32(state)) 94 } 95 96 // CompareAndSwap atomicaly compares and swaps the state value. 97 func (s *ThreadState) CompareAndSwap(old, state ThreadState) bool { 98 return atomic.CompareAndSwapUint32((*uint32)(s), uint32(old), uint32(state)) 99 } 100 101 // Get returns the current state value. 102 // 103 //go:nosplit 104 func (s *ThreadState) Get() ThreadState { 105 return ThreadState(atomic.LoadUint32((*uint32)(s))) 106 } 107 108 const ( 109 // ThreadStateNone means that the thread is executing the user workload. 110 ThreadStateNone ThreadState = iota 111 // ThreadStateDone means that last event has been handled and the stub thread 112 // can be resumed. 113 ThreadStateDone 114 // ThreadStatePrep means that syshandler started filling the sysmsg struct. 115 ThreadStatePrep 116 // ThreadStateAsleep means that this thread fell asleep because there was not 117 // enough contexts to process in the context queue. 118 ThreadStateAsleep 119 // ThreadStateInitializing is only set once at sysmsg thread creation time. It 120 // is used to tell the signal handler that the thread does not yet have a 121 // context. 122 ThreadStateInitializing 123 ) 124 125 // Msg contains the current state of the sysmsg thread. 126 type Msg struct { 127 // The next batch of fields is used to call the syshandler stub 128 // function. A system call can be replaced with a function call. When 129 // a function call is executed, it can't change the current process 130 // stack, so it needs to save stack and instruction registers, switch 131 // on its syshandler stack and call the jmp instruction to the syshandler 132 // address. 133 // 134 // Self is a pointer to itself in a process address space. 135 Self uint64 136 // RetAddr is a return address from the syshandler function. 137 RetAddr uint64 138 // Syshandler is an address of the syshandler function. 139 Syshandler uint64 140 // SyshandlerStack is an address of the thread syshandler stack. 141 SyshandlerStack uint64 142 // AppStack is a value of the stack register before calling the syshandler 143 // function. 144 AppStack uint64 145 // interrupt is non-zero if there is a postponed interrupt. 146 interrupt uint32 147 // State indicates to the sentry what the sysmsg thread is doing at a given 148 // moment. 149 State ThreadState 150 // Context is a pointer to the ThreadContext struct that the current sysmsg 151 // thread is processing. 152 Context uint64 153 154 // FaultJump is the size of a faulted instruction. 155 FaultJump int32 156 // Err is the error value with which the {sig|sys}handler crashes the stub 157 // thread (see sysmsg.h:__panic). 158 Err int32 159 // ErrAdditional is an error value that gives additional information 160 // about the panic. 161 ErrAdditional int32 162 // Line is the code line on which the {sig|sys}handler crashed the stub thread 163 // (see sysmsg.h:panic). 164 Line int32 165 // Debug is a variable to use to get visibility into the stub from the sentry. 166 Debug uint64 167 // ThreadID is the ID of the sysmsg thread. 168 ThreadID uint32 169 } 170 171 // ContextState defines the reason the context has exited back to the sentry, 172 // or ContextStateNone if running/ready-to-run. 173 type ContextState uint32 174 175 // Set atomicaly sets the state value. 176 func (s *ContextState) Set(state ContextState) { 177 atomic.StoreUint32((*uint32)(s), uint32(state)) 178 } 179 180 // Get returns the current state value. 181 // 182 //go:nosplit 183 func (s *ContextState) Get() ContextState { 184 return ContextState(atomic.LoadUint32((*uint32)(s))) 185 } 186 187 // Context State types. 188 const ( 189 // ContextStateNone means that is either running in the user task or is ready 190 // to run in the user task. 191 ContextStateNone ContextState = iota 192 // ContextStateSyscall means that a syscall event is triggered from the 193 // sighandler. 194 ContextStateSyscall 195 // ContextStateFault means that there is a fault event that needs to be 196 // handled. 197 ContextStateFault 198 // ContextStateSyscallTrap means that a syscall event is triggered from 199 // a function call (syshandler). 200 ContextStateSyscallTrap 201 // ContextStateSyscallCanBePatched means that the syscall can be replaced 202 // with a function call. 203 ContextStateSyscallCanBePatched 204 // ContextStateInvalid is an invalid state that the sentry should never see. 205 ContextStateInvalid 206 ) 207 208 const ( 209 // MaxFPStateLen is the largest possible FPState that we will save. 210 // Note: This value was chosen to be able to fit ThreadContext into one page. 211 MaxFPStateLen uint32 = 3584 212 213 // AllocatedSizeofThreadContextStruct defines how much memory to allocate for 214 // one instance of ThreadContext. 215 // We over allocate the memory for it because: 216 // - The next instances needs to align to 64 bytes for purposes of xsave. 217 // - It's nice to align it to the page boundary. 218 AllocatedSizeofThreadContextStruct uintptr = 4096 219 ) 220 221 // ThreadContext contains the current context of the sysmsg thread. The struct 222 // facilitates switching contexts by allowing the sentry to switch pointers to 223 // this struct as it needs to. 224 type ThreadContext struct { 225 // FPState is a region of memory where: 226 // - syshandler saves FPU state to using xsave/fxsave 227 // - sighandler copies FPU state to from ucontext->uc_mcontext.fpregs 228 // Note that xsave requires this region of memory to be 64 byte aligned; 229 // therefore allocations of ThreadContext must be too. 230 FPState [MaxFPStateLen]byte 231 // FPStateChanged is set to true when the stub thread needs to restore FPState 232 // because the sentry changed it. 233 FPStateChanged uint64 234 // Regs is the context's GP register set. The {sig|sys}handler will save and 235 // restore the user app's registers here. 236 Regs linux.PtraceRegs 237 238 // SignalInfo is the siginfo struct. 239 SignalInfo linux.SignalInfo 240 // Signo is the signal that the stub is requesting the sentry to handle. 241 Signo int64 242 // State indicates the reason why the context has exited back to the sentry. 243 State ContextState 244 // Interrupt is set to indicate that this context has been interrupted. 245 Interrupt uint32 246 // ThreadID is the ID of the sysmsg thread that's currently working on the 247 // context. 248 ThreadID uint32 249 // LastThreadID is the ID of the previous sysmsg thread that ran the context 250 // (not the one currently working on it). This field is used by sysmsg threads 251 // to detect whether fpstate may have changed since the last time they ran a 252 // context. 253 LastThreadID uint32 254 // SentryFastPath is used to indicate to the stub thread that the sentry 255 // goroutine used for this thread context is busy-polling for a response 256 // instead of using FUTEX_WAIT. 257 SentryFastPath uint32 258 // AckedTime is used by sysmsg threads to signal to the sentry that this context 259 // has been picked up from the context queue and is actively being worked on. 260 // The stub thread puts down the timestamp at which it has started processing 261 // this context. 262 AckedTime uint64 263 // StateChangedTime is the time when the ThreadContext.State changed, as 264 // recorded by the stub thread when it gave it back to the sentry 265 // (the sentry does not populate this field except to reset it). 266 StateChangedTime uint64 267 // TLS is a pointer to a thread local storage. 268 // It is is only populated on ARM64. 269 TLS uint64 270 // Debug is a variable to use to get visibility into the stub from the sentry. 271 Debug uint64 272 } 273 274 // StubError are values that represent known stub-thread failure modes. 275 // Since these errors originate from the stub threads, look at 276 // sysmsg.h:stub_error. 277 type StubError int32 278 279 const ( 280 // StubErrorBadSysmsg indicates sysmsg->self did not match sysmsg. 281 StubErrorBadSysmsg StubError = 0x0bad0000 + iota 282 // StubErrorBadThreadState indicates sysmsg->state was invalid. 283 StubErrorBadThreadState 284 // StubErrorBadSpinningQueueDecref indicates stubs removed more threads 285 // from spinning queue than were put in. 286 StubErrorBadSpinningQueueDecref 287 // StubErrorArchPrctl indicates an error when calling arch_prctl. 288 StubErrorArchPrctl 289 // StubErrorFutex indicates an error when calling futex. 290 StubErrorFutex 291 // StubErrorBadContextID indicates a context received from the context 292 // queue was of unexpected value. 293 StubErrorBadContextID 294 // StubErrorFpStateBadHeader indicates that the floating point state 295 // header did not match the expected value. 296 StubErrorFpStateBadHeader 297 ) 298 299 // LINT.ThenChange(sysmsg.h) 300 301 // Init initializes the message. 302 func (m *Msg) Init(threadID uint32) { 303 m.Err = 0 304 m.ErrAdditional = 0 305 m.Line = -1 306 m.ThreadID = threadID 307 m.Context = 0 308 } 309 310 // Init initializes the ThreadContext instance. 311 func (c *ThreadContext) Init(initialThreadID uint32) { 312 c.FPStateChanged = 1 313 c.Regs = linux.PtraceRegs{} 314 c.Signo = 0 315 c.SignalInfo = linux.SignalInfo{} 316 c.State = ContextStateNone 317 c.ThreadID = initialThreadID 318 } 319 320 // ConvertSysmsgErr converts m.Err to platform.ContextError. 321 func (m *Msg) ConvertSysmsgErr() *platform.ContextError { 322 err := &platform.ContextError{ 323 Errno: unix.EPERM, 324 } 325 326 const prefix = "systrap stub thread failure:" 327 suffix := fmt.Sprintf("(failed on line %d; %s)", atomic.LoadInt32(&m.Line), m.String()) 328 switch StubError(atomic.LoadInt32(&m.Err)) { 329 case StubErrorBadSysmsg: 330 err.Err = fmt.Errorf("%s sysmsg->self did not match sysmsg during sig/sys-handler %s", prefix, suffix) 331 case StubErrorBadThreadState: 332 err.Err = fmt.Errorf("%s sysmsg->state was invalid during sys-handler %s", prefix, suffix) 333 case StubErrorBadSpinningQueueDecref: 334 err.Err = fmt.Errorf("%s imbalanced use of spinning queue %s", prefix, suffix) 335 case StubErrorArchPrctl: 336 err.Err = fmt.Errorf("%s arch_prctl error=0x%x %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix) 337 case StubErrorFutex: 338 err.Err = fmt.Errorf("%s futex error=0x%x %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix) 339 case StubErrorBadContextID: 340 err.Err = fmt.Errorf("%s unexpected context ID (%d) from context queue %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix) 341 case StubErrorFpStateBadHeader: 342 err.Err = fmt.Errorf("%s FP state context magic header (%d) does not match expected FPSIMD_MAGIC %s", prefix, atomic.LoadInt32(&m.ErrAdditional), suffix) 343 default: 344 err.Err = fmt.Errorf("%s unknown reason (0x%x) (possible shared memory corruption) %s", prefix, atomic.LoadInt32(&m.Err), suffix) 345 } 346 347 return err 348 } 349 350 func (m *Msg) String() string { 351 var b strings.Builder 352 fmt.Fprintf(&b, "sysmsg.Msg{msg: %x state %d", m.Self, m.State) 353 fmt.Fprintf(&b, " err %x line %d debug %x", m.Err, m.Line, m.Debug) 354 fmt.Fprintf(&b, " app stack %x", m.AppStack) 355 fmt.Fprintf(&b, " context %x", m.Context) 356 fmt.Fprintf(&b, " ThreadID %d", m.ThreadID) 357 b.WriteString("}") 358 359 return b.String() 360 } 361 362 func (c *ThreadContext) String() string { 363 var b strings.Builder 364 fmt.Fprintf(&b, "sysmsg.ThreadContext{state %d", c.State.Get()) 365 fmt.Fprintf(&b, " fault addr %x syscall %d", c.SignalInfo.Addr(), c.SignalInfo.Syscall()) 366 fmt.Fprintf(&b, " ip %x sp %x", c.Regs.InstructionPointer(), c.Regs.StackPointer()) 367 fmt.Fprintf(&b, " FPStateChanged %d Regs %+v", c.FPStateChanged, c.Regs) 368 fmt.Fprintf(&b, " Interrupt %d", c.Interrupt) 369 fmt.Fprintf(&b, " ThreadID %d LastThreadID %d", c.ThreadID, c.LastThreadID) 370 fmt.Fprintf(&b, " SentryFastPath %d Acked %d", c.SentryFastPath, c.AckedTime) 371 fmt.Fprintf(&b, " signo: %d, siginfo: %+v", c.Signo, c.SignalInfo) 372 fmt.Fprintf(&b, " debug %d", atomic.LoadUint64(&c.Debug)) 373 b.WriteString("}") 374 375 return b.String() 376 }