github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/platform/systrap/sysmsg/sysmsg.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package sysmsg provides a stub signal handler and a communication protocol
    16  // between stub threads and the Sentry.
    17  //
    18  // Note that this package is allowlisted for use of sync/atomic.
    19  //
    20  // +checkalignedignore
    21  package sysmsg
    22  
    23  import (
    24  	"fmt"
    25  	"strings"
    26  	"sync/atomic"
    27  
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    30  )
    31  
    32  // LINT.IfChange
    33  // Per-thread stack layout:
    34  //
    35  // *------------*
    36  // | guard page |
    37  // |------------|
    38  // |            |
    39  // |  sysstack  |
    40  // |            |
    41  // *------------*
    42  // | guard page |
    43  // |------------|
    44  // |            |
    45  // |     ^      |
    46  // |    / \     |
    47  // |     |      |
    48  // |  altstack  |
    49  // |------------|
    50  // |   sysmsg   |
    51  // *------------*
    52  const (
    53  	// PerThreadMemSize is the size of a per-thread memory region.
    54  	PerThreadMemSize = 8 * hostarch.PageSize
    55  	// GuardSize is the size of an unmapped region which is placed right
    56  	// before the signal stack.
    57  	GuardSize                   = hostarch.PageSize
    58  	PerThreadPrivateStackOffset = GuardSize
    59  	PerThreadPrivateStackSize   = 2 * hostarch.PageSize
    60  	// PerThreadStackSharedSize is the size of a per-thread stack region.
    61  	PerThreadSharedStackSize   = 4 * hostarch.PageSize
    62  	PerThreadSharedStackOffset = 4 * hostarch.PageSize
    63  	// MsgOffsetFromStack is the offset of the Msg structure on
    64  	// the thread stack.
    65  	MsgOffsetFromSharedStack = PerThreadMemSize - hostarch.PageSize - PerThreadSharedStackOffset
    66  
    67  	// SpinningQueueMemSize is the size of a spinning queue memory region.
    68  	SpinningQueueMemSize = hostarch.PageSize
    69  )
    70  
    71  // StackAddrToMsg returns an address of a sysmsg structure.
    72  func StackAddrToMsg(sp uintptr) uintptr {
    73  	return sp + MsgOffsetFromSharedStack
    74  }
    75  
    76  // StackAddrToSyshandlerStack returns an address of a syshandler stack.
    77  func StackAddrToSyshandlerStack(sp uintptr) uintptr {
    78  	return sp + PerThreadPrivateStackOffset + PerThreadPrivateStackSize
    79  }
    80  
    81  // MsgToStackAddr returns a start address of a stack.
    82  func MsgToStackAddr(msg uintptr) uintptr {
    83  	return msg - MsgOffsetFromSharedStack
    84  }
    85  
    86  // ThreadState is used to store a state of the sysmsg thread.
    87  type ThreadState uint32
    88  
    89  // Set atomicaly sets the state value.
    90  func (s *ThreadState) Set(state ThreadState) {
    91  	atomic.StoreUint32((*uint32)(s), uint32(state))
    92  }
    93  
    94  // CompareAndSwap atomicaly compares and swaps the state value.
    95  func (s *ThreadState) CompareAndSwap(old, state ThreadState) bool {
    96  	return atomic.CompareAndSwapUint32((*uint32)(s), uint32(old), uint32(state))
    97  }
    98  
    99  // Get returns the current state value.
   100  //
   101  //go:nosplit
   102  func (s *ThreadState) Get() ThreadState {
   103  	return ThreadState(atomic.LoadUint32((*uint32)(s)))
   104  }
   105  
   106  const (
   107  	// ThreadStateNone means that the thread is executing the user workload.
   108  	ThreadStateNone ThreadState = iota
   109  	// ThreadStateDone means that last event has been handled and the stub thread
   110  	// can be resumed.
   111  	ThreadStateDone
   112  	// ThreadStatePrep means that syshandler started filling the sysmsg struct.
   113  	ThreadStatePrep
   114  	// ThreadStateAsleep means that this thread fell asleep because there was not
   115  	// enough contexts to process in the context queue.
   116  	ThreadStateAsleep
   117  	// ThreadStateInitializing is only set once at sysmsg thread creation time. It
   118  	// is used to tell the signal handler that the thread does not yet have a
   119  	// context.
   120  	ThreadStateInitializing
   121  )
   122  
   123  // Msg contains the current state of the sysmsg thread.
   124  type Msg struct {
   125  	// The next batch of fields is used to call the syshandler stub
   126  	// function. A system call can be replaced with a function call. When
   127  	// a function call is executed, it can't change the current process
   128  	// stack, so it needs to save stack and instruction registers, switch
   129  	// on its syshandler stack and call the jmp instruction to the syshandler
   130  	// address.
   131  	//
   132  	// Self is a pointer to itself in a process address space.
   133  	Self uint64
   134  	// RetAddr is a return address from the syshandler function.
   135  	RetAddr uint64
   136  	// Syshandler is an address of the syshandler function.
   137  	Syshandler uint64
   138  	// SyshandlerStack is an address of  the thread syshandler stack.
   139  	SyshandlerStack uint64
   140  	// AppStack is a value of the stack register before calling the syshandler
   141  	// function.
   142  	AppStack uint64
   143  	// interrupt is non-zero if there is a postponed interrupt.
   144  	interrupt uint32
   145  	// State indicates to the sentry what the sysmsg thread is doing at a given
   146  	// moment.
   147  	State ThreadState
   148  	// Context is a pointer to the ThreadContext struct that the current sysmsg
   149  	// thread is processing.
   150  	Context uint64
   151  
   152  	// FaultJump is the size of a faulted instruction.
   153  	FaultJump int32
   154  	// Err is the error value with which the {sig|sys}handler crashes the stub
   155  	// thread (see sysmsg.h:__panic).
   156  	Err int32
   157  	// Line is the code line on which the {sig|sys}handler crashed the stub thread
   158  	// (see sysmsg.h:panic).
   159  	Line int32
   160  	// Debug is a variable to use to get visibility into the stub from the sentry.
   161  	Debug uint64
   162  	// ThreadID is the ID of the sysmsg thread.
   163  	ThreadID uint32
   164  }
   165  
   166  // ContextState defines the reason the context has exited back to the sentry,
   167  // or ContextStateNone if running/ready-to-run.
   168  type ContextState uint32
   169  
   170  // Set atomicaly sets the state value.
   171  func (s *ContextState) Set(state ContextState) {
   172  	atomic.StoreUint32((*uint32)(s), uint32(state))
   173  }
   174  
   175  // Get returns the current state value.
   176  //
   177  //go:nosplit
   178  func (s *ContextState) Get() ContextState {
   179  	return ContextState(atomic.LoadUint32((*uint32)(s)))
   180  }
   181  
   182  // Context State types.
   183  const (
   184  	// ContextStateNone means that is either running in the user task or is ready
   185  	// to run in the user task.
   186  	ContextStateNone ContextState = iota
   187  	// ContextStateSyscall means that a syscall event is triggered from the
   188  	// sighandler.
   189  	ContextStateSyscall
   190  	// ContextStateFault means that there is a fault event that needs to be
   191  	// handled.
   192  	ContextStateFault
   193  	// ContextStateSyscallTrap means that a syscall event is triggered from
   194  	// a function call (syshandler).
   195  	ContextStateSyscallTrap
   196  	// ContextStateSyscallCanBePatched means that the syscall can be replaced
   197  	// with a function call.
   198  	ContextStateSyscallCanBePatched
   199  	// ContextStateInvalid is an invalid state that the sentry should never see.
   200  	ContextStateInvalid
   201  )
   202  
   203  const (
   204  	// MaxFPStateLen is the largest possible FPState that we will save.
   205  	// Note: This value was chosen to be able to fit ThreadContext into one page.
   206  	MaxFPStateLen uint32 = 3584
   207  
   208  	// AllocatedSizeofThreadContextStruct defines how much memory to allocate for
   209  	// one instance of ThreadContext.
   210  	// We over allocate the memory for it because:
   211  	//   - The next instances needs to align to 64 bytes for purposes of xsave.
   212  	//   - It's nice to align it to the page boundary.
   213  	AllocatedSizeofThreadContextStruct uintptr = 4096
   214  )
   215  
   216  // ThreadContext contains the current context of the sysmsg thread. The struct
   217  // facilitates switching contexts by allowing the sentry to switch pointers to
   218  // this struct as it needs to.
   219  type ThreadContext struct {
   220  	// FPState is a region of memory where:
   221  	//   - syshandler saves FPU state to using xsave/fxsave
   222  	//   - sighandler copies FPU state to from ucontext->uc_mcontext.fpregs
   223  	// Note that xsave requires this region of memory to be 64 byte aligned;
   224  	// therefore allocations of ThreadContext must be too.
   225  	FPState [MaxFPStateLen]byte
   226  	// FPStateChanged is set to true when the stub thread needs to restore FPState
   227  	// because the sentry changed it.
   228  	FPStateChanged uint64
   229  	// Regs is the context's GP register set. The {sig|sys}handler will save and
   230  	// restore the user app's registers here.
   231  	Regs linux.PtraceRegs
   232  
   233  	// SignalInfo is the siginfo struct.
   234  	SignalInfo linux.SignalInfo
   235  	// Signo is the signal that the stub is requesting the sentry to handle.
   236  	Signo int64
   237  	// State indicates the reason why the context has exited back to the sentry.
   238  	State ContextState
   239  	// Interrupt is set to indicate that this context has been interrupted.
   240  	Interrupt uint32
   241  	// ThreadID is the ID of the sysmsg thread that's currently working on the
   242  	// context.
   243  	ThreadID uint32
   244  	// LastThreadID is the ID of the previous sysmsg thread that ran the context
   245  	// (not the one currently working on it). This field is used by sysmsg threads
   246  	// to detect whether fpstate may have changed since the last time they ran a
   247  	// context.
   248  	LastThreadID uint32
   249  	// SentryFastPath is used to indicate to the stub thread that the sentry
   250  	// goroutine used for this thread context is busy-polling for a response
   251  	// instead of using FUTEX_WAIT.
   252  	SentryFastPath uint32
   253  	// Acked is used by sysmsg threads to signal to the sentry that this context
   254  	// has been picked up from the context queue and is actively being worked on.
   255  	Acked uint32
   256  	// TLS is a pointer to a thread local storage.
   257  	// It is is only populated on ARM64.
   258  	TLS uint64
   259  	// Debug is a variable to use to get visibility into the stub from the sentry.
   260  	Debug uint64
   261  }
   262  
   263  // LINT.ThenChange(sysmsg.h)
   264  
   265  // Init initializes the message.
   266  func (m *Msg) Init(threadID uint32) {
   267  	m.Err = 0
   268  	m.Line = -1
   269  	m.ThreadID = threadID
   270  	m.Context = 0
   271  }
   272  
   273  // Init initializes the ThreadContext instance.
   274  func (c *ThreadContext) Init(initialThreadID uint32) {
   275  	c.FPStateChanged = 1
   276  	c.Regs = linux.PtraceRegs{}
   277  	c.Signo = 0
   278  	c.SignalInfo = linux.SignalInfo{}
   279  	c.State = ContextStateNone
   280  	c.ThreadID = initialThreadID
   281  }
   282  
   283  func (m *Msg) String() string {
   284  	var b strings.Builder
   285  	fmt.Fprintf(&b, "sysmsg.Msg{msg: %x state %d", m.Self, m.State)
   286  	fmt.Fprintf(&b, " err %x line %d debug %x", m.Err, m.Line, m.Debug)
   287  	fmt.Fprintf(&b, " app stack %x", m.AppStack)
   288  	fmt.Fprintf(&b, " context %x", m.Context)
   289  	fmt.Fprintf(&b, " ThreadID %d", m.ThreadID)
   290  	b.WriteString("}")
   291  
   292  	return b.String()
   293  }
   294  
   295  func (c *ThreadContext) String() string {
   296  	var b strings.Builder
   297  	fmt.Fprintf(&b, "sysmsg.ThreadContext{state %d", c.State.Get())
   298  	fmt.Fprintf(&b, " fault addr %x syscall %d", c.SignalInfo.Addr(), c.SignalInfo.Syscall())
   299  	fmt.Fprintf(&b, " ip %x sp %x", c.Regs.InstructionPointer(), c.Regs.StackPointer())
   300  	fmt.Fprintf(&b, " FPStateChanged %d Regs %+v", c.FPStateChanged, c.Regs)
   301  	fmt.Fprintf(&b, " Interrupt %d", c.Interrupt)
   302  	fmt.Fprintf(&b, " ThreadID %d LastThreadID %d", c.ThreadID, c.LastThreadID)
   303  	fmt.Fprintf(&b, " SentryFastPath %d Acked %d", c.SentryFastPath, c.Acked)
   304  	fmt.Fprintf(&b, " signo: %d, siginfo: %+v", c.Signo, c.SignalInfo)
   305  	fmt.Fprintf(&b, " debug %d", atomic.LoadUint64(&c.Debug))
   306  	b.WriteString("}")
   307  
   308  	return b.String()
   309  }