github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/platform/systrap/sysmsg/sysmsg.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package sysmsg provides a stub signal handler and a communication protocol 16 // between stub threads and the Sentry. 17 // 18 // Note that this package is allowlisted for use of sync/atomic. 19 // 20 // +checkalignedignore 21 package sysmsg 22 23 import ( 24 "fmt" 25 "strings" 26 "sync/atomic" 27 28 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 29 "github.com/MerlinKodo/gvisor/pkg/hostarch" 30 ) 31 32 // LINT.IfChange 33 // Per-thread stack layout: 34 // 35 // *------------* 36 // | guard page | 37 // |------------| 38 // | | 39 // | sysstack | 40 // | | 41 // *------------* 42 // | guard page | 43 // |------------| 44 // | | 45 // | ^ | 46 // | / \ | 47 // | | | 48 // | altstack | 49 // |------------| 50 // | sysmsg | 51 // *------------* 52 const ( 53 // PerThreadMemSize is the size of a per-thread memory region. 54 PerThreadMemSize = 8 * hostarch.PageSize 55 // GuardSize is the size of an unmapped region which is placed right 56 // before the signal stack. 57 GuardSize = hostarch.PageSize 58 PerThreadPrivateStackOffset = GuardSize 59 PerThreadPrivateStackSize = 2 * hostarch.PageSize 60 // PerThreadStackSharedSize is the size of a per-thread stack region. 61 PerThreadSharedStackSize = 4 * hostarch.PageSize 62 PerThreadSharedStackOffset = 4 * hostarch.PageSize 63 // MsgOffsetFromStack is the offset of the Msg structure on 64 // the thread stack. 65 MsgOffsetFromSharedStack = PerThreadMemSize - hostarch.PageSize - PerThreadSharedStackOffset 66 67 // SpinningQueueMemSize is the size of a spinning queue memory region. 68 SpinningQueueMemSize = hostarch.PageSize 69 ) 70 71 // StackAddrToMsg returns an address of a sysmsg structure. 72 func StackAddrToMsg(sp uintptr) uintptr { 73 return sp + MsgOffsetFromSharedStack 74 } 75 76 // StackAddrToSyshandlerStack returns an address of a syshandler stack. 77 func StackAddrToSyshandlerStack(sp uintptr) uintptr { 78 return sp + PerThreadPrivateStackOffset + PerThreadPrivateStackSize 79 } 80 81 // MsgToStackAddr returns a start address of a stack. 82 func MsgToStackAddr(msg uintptr) uintptr { 83 return msg - MsgOffsetFromSharedStack 84 } 85 86 // ThreadState is used to store a state of the sysmsg thread. 87 type ThreadState uint32 88 89 // Set atomicaly sets the state value. 90 func (s *ThreadState) Set(state ThreadState) { 91 atomic.StoreUint32((*uint32)(s), uint32(state)) 92 } 93 94 // CompareAndSwap atomicaly compares and swaps the state value. 95 func (s *ThreadState) CompareAndSwap(old, state ThreadState) bool { 96 return atomic.CompareAndSwapUint32((*uint32)(s), uint32(old), uint32(state)) 97 } 98 99 // Get returns the current state value. 100 // 101 //go:nosplit 102 func (s *ThreadState) Get() ThreadState { 103 return ThreadState(atomic.LoadUint32((*uint32)(s))) 104 } 105 106 const ( 107 // ThreadStateNone means that the thread is executing the user workload. 108 ThreadStateNone ThreadState = iota 109 // ThreadStateDone means that last event has been handled and the stub thread 110 // can be resumed. 111 ThreadStateDone 112 // ThreadStatePrep means that syshandler started filling the sysmsg struct. 113 ThreadStatePrep 114 // ThreadStateAsleep means that this thread fell asleep because there was not 115 // enough contexts to process in the context queue. 116 ThreadStateAsleep 117 // ThreadStateInitializing is only set once at sysmsg thread creation time. It 118 // is used to tell the signal handler that the thread does not yet have a 119 // context. 120 ThreadStateInitializing 121 ) 122 123 // Msg contains the current state of the sysmsg thread. 124 type Msg struct { 125 // The next batch of fields is used to call the syshandler stub 126 // function. A system call can be replaced with a function call. When 127 // a function call is executed, it can't change the current process 128 // stack, so it needs to save stack and instruction registers, switch 129 // on its syshandler stack and call the jmp instruction to the syshandler 130 // address. 131 // 132 // Self is a pointer to itself in a process address space. 133 Self uint64 134 // RetAddr is a return address from the syshandler function. 135 RetAddr uint64 136 // Syshandler is an address of the syshandler function. 137 Syshandler uint64 138 // SyshandlerStack is an address of the thread syshandler stack. 139 SyshandlerStack uint64 140 // AppStack is a value of the stack register before calling the syshandler 141 // function. 142 AppStack uint64 143 // interrupt is non-zero if there is a postponed interrupt. 144 interrupt uint32 145 // State indicates to the sentry what the sysmsg thread is doing at a given 146 // moment. 147 State ThreadState 148 // Context is a pointer to the ThreadContext struct that the current sysmsg 149 // thread is processing. 150 Context uint64 151 152 // FaultJump is the size of a faulted instruction. 153 FaultJump int32 154 // Err is the error value with which the {sig|sys}handler crashes the stub 155 // thread (see sysmsg.h:__panic). 156 Err int32 157 // Line is the code line on which the {sig|sys}handler crashed the stub thread 158 // (see sysmsg.h:panic). 159 Line int32 160 // Debug is a variable to use to get visibility into the stub from the sentry. 161 Debug uint64 162 // ThreadID is the ID of the sysmsg thread. 163 ThreadID uint32 164 } 165 166 // ContextState defines the reason the context has exited back to the sentry, 167 // or ContextStateNone if running/ready-to-run. 168 type ContextState uint32 169 170 // Set atomicaly sets the state value. 171 func (s *ContextState) Set(state ContextState) { 172 atomic.StoreUint32((*uint32)(s), uint32(state)) 173 } 174 175 // Get returns the current state value. 176 // 177 //go:nosplit 178 func (s *ContextState) Get() ContextState { 179 return ContextState(atomic.LoadUint32((*uint32)(s))) 180 } 181 182 // Context State types. 183 const ( 184 // ContextStateNone means that is either running in the user task or is ready 185 // to run in the user task. 186 ContextStateNone ContextState = iota 187 // ContextStateSyscall means that a syscall event is triggered from the 188 // sighandler. 189 ContextStateSyscall 190 // ContextStateFault means that there is a fault event that needs to be 191 // handled. 192 ContextStateFault 193 // ContextStateSyscallTrap means that a syscall event is triggered from 194 // a function call (syshandler). 195 ContextStateSyscallTrap 196 // ContextStateSyscallCanBePatched means that the syscall can be replaced 197 // with a function call. 198 ContextStateSyscallCanBePatched 199 // ContextStateInvalid is an invalid state that the sentry should never see. 200 ContextStateInvalid 201 ) 202 203 const ( 204 // MaxFPStateLen is the largest possible FPState that we will save. 205 // Note: This value was chosen to be able to fit ThreadContext into one page. 206 MaxFPStateLen uint32 = 3584 207 208 // AllocatedSizeofThreadContextStruct defines how much memory to allocate for 209 // one instance of ThreadContext. 210 // We over allocate the memory for it because: 211 // - The next instances needs to align to 64 bytes for purposes of xsave. 212 // - It's nice to align it to the page boundary. 213 AllocatedSizeofThreadContextStruct uintptr = 4096 214 ) 215 216 // ThreadContext contains the current context of the sysmsg thread. The struct 217 // facilitates switching contexts by allowing the sentry to switch pointers to 218 // this struct as it needs to. 219 type ThreadContext struct { 220 // FPState is a region of memory where: 221 // - syshandler saves FPU state to using xsave/fxsave 222 // - sighandler copies FPU state to from ucontext->uc_mcontext.fpregs 223 // Note that xsave requires this region of memory to be 64 byte aligned; 224 // therefore allocations of ThreadContext must be too. 225 FPState [MaxFPStateLen]byte 226 // FPStateChanged is set to true when the stub thread needs to restore FPState 227 // because the sentry changed it. 228 FPStateChanged uint64 229 // Regs is the context's GP register set. The {sig|sys}handler will save and 230 // restore the user app's registers here. 231 Regs linux.PtraceRegs 232 233 // SignalInfo is the siginfo struct. 234 SignalInfo linux.SignalInfo 235 // Signo is the signal that the stub is requesting the sentry to handle. 236 Signo int64 237 // State indicates the reason why the context has exited back to the sentry. 238 State ContextState 239 // Interrupt is set to indicate that this context has been interrupted. 240 Interrupt uint32 241 // ThreadID is the ID of the sysmsg thread that's currently working on the 242 // context. 243 ThreadID uint32 244 // LastThreadID is the ID of the previous sysmsg thread that ran the context 245 // (not the one currently working on it). This field is used by sysmsg threads 246 // to detect whether fpstate may have changed since the last time they ran a 247 // context. 248 LastThreadID uint32 249 // SentryFastPath is used to indicate to the stub thread that the sentry 250 // goroutine used for this thread context is busy-polling for a response 251 // instead of using FUTEX_WAIT. 252 SentryFastPath uint32 253 // Acked is used by sysmsg threads to signal to the sentry that this context 254 // has been picked up from the context queue and is actively being worked on. 255 Acked uint32 256 // TLS is a pointer to a thread local storage. 257 // It is is only populated on ARM64. 258 TLS uint64 259 // Debug is a variable to use to get visibility into the stub from the sentry. 260 Debug uint64 261 } 262 263 // LINT.ThenChange(sysmsg.h) 264 265 // Init initializes the message. 266 func (m *Msg) Init(threadID uint32) { 267 m.Err = 0 268 m.Line = -1 269 m.ThreadID = threadID 270 m.Context = 0 271 } 272 273 // Init initializes the ThreadContext instance. 274 func (c *ThreadContext) Init(initialThreadID uint32) { 275 c.FPStateChanged = 1 276 c.Regs = linux.PtraceRegs{} 277 c.Signo = 0 278 c.SignalInfo = linux.SignalInfo{} 279 c.State = ContextStateNone 280 c.ThreadID = initialThreadID 281 } 282 283 func (m *Msg) String() string { 284 var b strings.Builder 285 fmt.Fprintf(&b, "sysmsg.Msg{msg: %x state %d", m.Self, m.State) 286 fmt.Fprintf(&b, " err %x line %d debug %x", m.Err, m.Line, m.Debug) 287 fmt.Fprintf(&b, " app stack %x", m.AppStack) 288 fmt.Fprintf(&b, " context %x", m.Context) 289 fmt.Fprintf(&b, " ThreadID %d", m.ThreadID) 290 b.WriteString("}") 291 292 return b.String() 293 } 294 295 func (c *ThreadContext) String() string { 296 var b strings.Builder 297 fmt.Fprintf(&b, "sysmsg.ThreadContext{state %d", c.State.Get()) 298 fmt.Fprintf(&b, " fault addr %x syscall %d", c.SignalInfo.Addr(), c.SignalInfo.Syscall()) 299 fmt.Fprintf(&b, " ip %x sp %x", c.Regs.InstructionPointer(), c.Regs.StackPointer()) 300 fmt.Fprintf(&b, " FPStateChanged %d Regs %+v", c.FPStateChanged, c.Regs) 301 fmt.Fprintf(&b, " Interrupt %d", c.Interrupt) 302 fmt.Fprintf(&b, " ThreadID %d LastThreadID %d", c.ThreadID, c.LastThreadID) 303 fmt.Fprintf(&b, " SentryFastPath %d Acked %d", c.SentryFastPath, c.Acked) 304 fmt.Fprintf(&b, " signo: %d, siginfo: %+v", c.Signo, c.SignalInfo) 305 fmt.Fprintf(&b, " debug %d", atomic.LoadUint64(&c.Debug)) 306 b.WriteString("}") 307 308 return b.String() 309 }