github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/platform/systrap/syscall_thread.go (about)

     1  // Copyright 2021 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package systrap
    16  
    17  import (
    18  	"fmt"
    19  	"sync/atomic"
    20  
    21  	"golang.org/x/sys/unix"
    22  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/arch"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/memmap"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/pgalloc"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/platform/systrap/sysmsg"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/usage"
    28  )
    29  
    30  // The syscall message consists of sentry and stub messages.
    31  const syscallThreadMessageSize = hostarch.PageSize * 2
    32  
    33  // syscallThread implements the process of calling syscalls in a stub process.
    34  //
    35  // Each syscall thread owns a shared memory region to communicate with the
    36  // Sentry. This region consists of two pages. The first page called
    37  // sentryMessage is mapped as read-only in the stub address space. The second
    38  // page called stubMessage is mapped as read-write in the stub process.
    39  //
    40  // Any memory regions that are mapped as read-write in a stub address space can
    41  // be changed from a user code. This means that we can't trust the content of
    42  // stubMessage, but it is used to receive a syscall return code. Therefore
    43  // syscallThread can be used only in these cases:
    44  //   - If a system call never fails (e.g munmap).
    45  //   - If a system call has to return only one know value or if it fails,
    46  //     it doesn't not reveal any data (e.g. mmap).
    47  type syscallThread struct {
    48  	// subproc is a link to the subprocess which is used to call native
    49  	// system calls and track when a sysmsg thread has to be recreated.
    50  	// Look at getSysmsgThread() for more details.
    51  	subproc *subprocess
    52  
    53  	// thread is a thread identifier.
    54  	thread *thread
    55  
    56  	// stackRange is the range for the sentry syscall message in the memory
    57  	// file.
    58  	stackRange memmap.FileRange
    59  
    60  	// sentryAddr is the address of the shared memory region in the Sentry
    61  	// address space.
    62  	sentryAddr uintptr
    63  	// stubAddr is the address of the shared memory region in the stub
    64  	// address space.
    65  	stubAddr uintptr
    66  
    67  	// sentryMessage is the first page of the share message that can't be
    68  	// modified by the stub thread.
    69  	sentryMessage *syscallSentryMessage
    70  	// stubMessage is the second page of the shared message that can be
    71  	// modified by the stub thread.
    72  	stubMessage *syscallStubMessage
    73  }
    74  
    75  func (t *syscallThread) init() error {
    76  	// Allocate a new shared memory message.
    77  	opts := pgalloc.AllocOpts{
    78  		Kind: usage.System,
    79  		Dir:  pgalloc.TopDown,
    80  	}
    81  	fr, err := t.subproc.memoryFile.Allocate(syscallThreadMessageSize, opts)
    82  	if err != nil {
    83  		return err
    84  	}
    85  
    86  	t.stackRange = fr
    87  	t.stubAddr = stubSysmsgStack + sysmsg.PerThreadMemSize*uintptr(t.thread.sysmsgStackID)
    88  	err = t.mapMessageIntoStub()
    89  	if err != nil {
    90  		t.destroy()
    91  		return err
    92  	}
    93  
    94  	// Map the stack into the sentry.
    95  	sentryAddr, _, errno := unix.RawSyscall6(
    96  		unix.SYS_MMAP,
    97  		0,
    98  		syscallThreadMessageSize,
    99  		unix.PROT_WRITE|unix.PROT_READ,
   100  		unix.MAP_SHARED|unix.MAP_FILE,
   101  		uintptr(t.subproc.memoryFile.FD()), uintptr(fr.Start))
   102  	if errno != 0 {
   103  		t.destroy()
   104  		return fmt.Errorf("mmap failed: %v", errno)
   105  	}
   106  	t.sentryAddr = sentryAddr
   107  
   108  	t.initRequestReplyAddresses(sentryAddr)
   109  	return nil
   110  }
   111  
   112  func (t *syscallThread) destroy() {
   113  	if t.sentryAddr != 0 {
   114  		_, _, errno := unix.RawSyscall6(
   115  			unix.SYS_MUNMAP,
   116  			t.sentryAddr,
   117  			syscallThreadMessageSize,
   118  			0, 0, 0, 0)
   119  		if errno != 0 {
   120  			panic(fmt.Sprintf("mumap failed: %v", errno))
   121  		}
   122  	}
   123  	if t.stubAddr != 0 {
   124  		_, err := t.thread.syscallIgnoreInterrupt(&t.thread.initRegs, unix.SYS_MUNMAP,
   125  			arch.SyscallArgument{Value: t.stubAddr},
   126  			arch.SyscallArgument{Value: uintptr(syscallThreadMessageSize)})
   127  		if err != nil {
   128  			panic(fmt.Sprintf("munmap failed: %v", err))
   129  		}
   130  	}
   131  	t.subproc.memoryFile.DecRef(t.stackRange)
   132  	t.subproc.sysmsgStackPool.Put(t.thread.sysmsgStackID)
   133  }
   134  
   135  // mapMessageIntoStub maps the syscall message into the stub process address space.
   136  func (t *syscallThread) mapMessageIntoStub() error {
   137  	// Map sentryMessage as read-only.
   138  	_, err := t.thread.syscallIgnoreInterrupt(&t.thread.initRegs, unix.SYS_MMAP,
   139  		arch.SyscallArgument{Value: t.stubAddr},
   140  		arch.SyscallArgument{Value: uintptr(hostarch.PageSize)},
   141  		arch.SyscallArgument{Value: uintptr(unix.PROT_READ)},
   142  		arch.SyscallArgument{Value: unix.MAP_SHARED | unix.MAP_FILE | unix.MAP_FIXED},
   143  		arch.SyscallArgument{Value: uintptr(t.subproc.memoryFile.FD())},
   144  		arch.SyscallArgument{Value: uintptr(t.stackRange.Start)})
   145  	if err != nil {
   146  		return err
   147  	}
   148  	// Map stubMessage as read-write.
   149  	_, err = t.thread.syscallIgnoreInterrupt(&t.thread.initRegs, unix.SYS_MMAP,
   150  		arch.SyscallArgument{Value: t.stubAddr + syscallStubMessageOffset},
   151  		arch.SyscallArgument{Value: uintptr(hostarch.PageSize)},
   152  		arch.SyscallArgument{Value: uintptr(unix.PROT_READ | unix.PROT_WRITE)},
   153  		arch.SyscallArgument{Value: unix.MAP_SHARED | unix.MAP_FILE | unix.MAP_FIXED},
   154  		arch.SyscallArgument{Value: uintptr(t.subproc.memoryFile.FD())},
   155  		arch.SyscallArgument{Value: uintptr(t.stackRange.Start + hostarch.PageSize)})
   156  	return err
   157  }
   158  
   159  // attach attaches to the stub thread with ptrace and unlock signals.
   160  func (t *syscallThread) attach() {
   161  	t.thread.attach()
   162  	// We need to unblock signals, because the TRAP signal is used to run
   163  	// syscalls via ptrace.
   164  	t.unmaskAllSignalsAttached()
   165  }
   166  
   167  func (t *syscallThread) syscall(sysno uintptr, args ...arch.SyscallArgument) (uintptr, error) {
   168  	sentryMsg := t.sentryMessage
   169  	stubMsg := t.stubMessage
   170  	sentryMsg.sysno = uint64(sysno)
   171  	for i := 0; i < len(sentryMsg.args); i++ {
   172  		if i < len(args) {
   173  			sentryMsg.args[i] = uint64(args[i].Value)
   174  		} else {
   175  			sentryMsg.args[i] = 0
   176  		}
   177  	}
   178  
   179  	// Notify the syscall thread about a new syscall request.
   180  	atomic.AddUint32(&sentryMsg.state, 1)
   181  	futexWakeUint32(&sentryMsg.state)
   182  
   183  	// Wait for reply.
   184  	//
   185  	// futex waits for sentryMsg.state that isn't changed, so it will
   186  	// returns only only when the other side will call FUTEX_WAKE.
   187  	futexWaitWake(&sentryMsg.state, atomic.LoadUint32(&sentryMsg.state))
   188  
   189  	return uintptr(stubMsg.ret), nil
   190  }