github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/platform/systrap/stub_unsafe.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package systrap
    16  
    17  import (
    18  	"math/rand"
    19  	"reflect"
    20  	"unsafe"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"github.com/metacubex/gvisor/pkg/abi/linux"
    24  	"github.com/metacubex/gvisor/pkg/bpf"
    25  	"github.com/metacubex/gvisor/pkg/hostarch"
    26  	"github.com/metacubex/gvisor/pkg/log"
    27  	"github.com/metacubex/gvisor/pkg/safecopy"
    28  	"github.com/metacubex/gvisor/pkg/sentry/platform/systrap/sysmsg"
    29  )
    30  
    31  // initStubProcess is defined in arch-specific assembly.
    32  func initStubProcess()
    33  
    34  // addrOfInitStubProcess returns the start address of initStubProcess.
    35  //
    36  // In Go 1.17+, Go references to assembly functions resolve to an ABIInternal
    37  // wrapper function rather than the function itself. We must reference from
    38  // assembly to get the ABI0 (i.e., primary) address.
    39  func addrOfInitStubProcess() uintptr
    40  
    41  // stubCall calls the stub at the given address with the given pid.
    42  func stubCall(addr, pid uintptr)
    43  
    44  // unsafeSlice returns a slice for the given address and length.
    45  func unsafeSlice(addr uintptr, length int) (slice []byte) {
    46  	sh := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
    47  	sh.Data = addr
    48  	sh.Len = length
    49  	sh.Cap = length
    50  	return
    51  }
    52  
    53  // prepareSeccompRules compiles stub process seccomp filters and fill
    54  // the sock_fprog structure. So the stub process will only need to call
    55  // seccomp system call to apply these filters.
    56  //
    57  //go:nosplit
    58  func prepareSeccompRules(stubSysmsgStart, stubSysmsgRules, stubSysmsgRulesLen uintptr) {
    59  	instrs := sysmsgThreadRules(stubSysmsgStart)
    60  	progLen := len(instrs) * int(unsafe.Sizeof(bpf.Instruction{}))
    61  	progPtr := stubSysmsgRules + unsafe.Sizeof(linux.SockFprog{})
    62  
    63  	if progLen+int(unsafe.Sizeof(linux.SockFprog{})) > int(stubSysmsgRulesLen) {
    64  		panic("not enough space for sysmsg seccomp rules")
    65  	}
    66  
    67  	var targetSlice []bpf.Instruction
    68  	sh := (*reflect.SliceHeader)(unsafe.Pointer(&targetSlice))
    69  	sh.Data = progPtr
    70  	sh.Cap = len(instrs)
    71  	sh.Len = sh.Cap
    72  
    73  	copy(targetSlice, instrs)
    74  
    75  	// stubSysmsgRules and progPtr are addresses from a stub mapping which
    76  	// is mapped once and never moved, so it is safe to use unsafe.Pointer
    77  	// this way for them.
    78  	sockProg := (*linux.SockFprog)(unsafe.Pointer(stubSysmsgRules))
    79  	sockProg.Len = uint16(len(instrs))
    80  	sockProg.Filter = (*linux.BPFInstruction)(unsafe.Pointer(progPtr))
    81  
    82  	// Make the seccomp rules stub read-only.
    83  	if _, _, errno := unix.RawSyscall(
    84  		unix.SYS_MPROTECT,
    85  		stubSysmsgRules,
    86  		stubSysmsgRulesLen,
    87  		unix.PROT_READ); errno != 0 {
    88  		panic("mprotect failed: " + errno.Error())
    89  	}
    90  }
    91  
    92  // stubInit allocates and  initializes the stub memory region which includes:
    93  //   - the stub code to do initial initialization of a stub process.
    94  //   - the sysmsg signal handler code to notify sentry about new events such as
    95  //     system calls, memory faults, etc.
    96  //   - precompiled seccomp rules to trap application system calls.
    97  //   - reserved space for stub-thread stack regions.
    98  func stubInit() {
    99  	// *--------stubStart-------------------*
   100  	// |--------stubInitProcess-------------|
   101  	// | stub code to init stub processes   |
   102  	// |--------stubSysmsgStart-------------|
   103  	// | sysmsg code                        |
   104  	// |--------stubSysmsgRuleStart---------|
   105  	// | precompiled sysmsg seccomp rules   |
   106  	// |--------guard page------------------|
   107  	// |--------random gap------------------|
   108  	// |                                    |
   109  	// |--------stubSysmsgStack-------------|
   110  	// | Reserved space for per-thread      |
   111  	// | sysmsg stacks.                     |
   112  	// |----------stubContextQueue----------|
   113  	// | Shared ringbuffer queue for stubs  |
   114  	// | to select the next context.        |
   115  	// |--------stubThreadContextRegion-----|
   116  	// | Reserved space for thread contexts |
   117  	// *------------------------------------*
   118  
   119  	// Grab the existing stub.
   120  	procStubBegin := addrOfInitStubProcess()
   121  	procStubLen := int(safecopy.FindEndAddress(procStubBegin) - procStubBegin)
   122  	procStubSlice := unsafeSlice(procStubBegin, procStubLen)
   123  	mapLen, _ := hostarch.PageRoundUp(uintptr(procStubLen))
   124  
   125  	stubSysmsgStart = mapLen
   126  	stubSysmsgLen := len(sysmsg.SighandlerBlob)
   127  	mapLen, _ = hostarch.PageRoundUp(mapLen + uintptr(stubSysmsgLen))
   128  
   129  	stubSysmsgRules = mapLen
   130  	stubSysmsgRulesLen = hostarch.PageSize * 4
   131  	mapLen += stubSysmsgRulesLen
   132  
   133  	stubROMapEnd = mapLen
   134  	// Add a guard page.
   135  	mapLen += hostarch.PageSize
   136  	stubSysmsgStack = mapLen
   137  
   138  	// Allocate maxGuestThreads plus ONE because each per-thread stack
   139  	// has to be aligned to sysmsg.PerThreadMemSize.
   140  	// Look at sysmsg/sighandler.c:sysmsg_addr() for more details.
   141  	mapLen, _ = hostarch.PageRoundUp(mapLen + sysmsg.PerThreadMemSize*(maxSystemThreads+1))
   142  
   143  	// Allocate context queue region
   144  	stubContextQueueRegion = mapLen
   145  	stubContextQueueRegionLen, _ = hostarch.PageRoundUp(unsafe.Sizeof(contextQueue{}))
   146  	mapLen += stubContextQueueRegionLen
   147  
   148  	stubSpinningThreadQueueAddr = mapLen
   149  	mapLen += sysmsg.SpinningQueueMemSize
   150  
   151  	// Allocate thread context region
   152  	stubContextRegion = mapLen
   153  	stubContextRegionLen = sysmsg.AllocatedSizeofThreadContextStruct * (maxGuestContexts + 1)
   154  	mapLen, _ = hostarch.PageRoundUp(mapLen + stubContextRegionLen)
   155  
   156  	// Randomize stubStart address.
   157  	randomOffset := uintptr(rand.Uint64() * hostarch.PageSize)
   158  	maxRandomOffset := maxRandomOffsetOfStubAddress - mapLen
   159  	stubStart = uintptr(0)
   160  	for offset := uintptr(0); offset < maxRandomOffset; offset += hostarch.PageSize {
   161  		stubStart = maxStubUserAddress + (randomOffset+offset)%maxRandomOffset
   162  		// Map the target address for the stub.
   163  		//
   164  		// We don't use FIXED here because we don't want to unmap
   165  		// something that may have been there already. We just walk
   166  		// down the address space until we find a place where the stub
   167  		// can be placed.
   168  		addr, _, _ := unix.RawSyscall6(
   169  			unix.SYS_MMAP,
   170  			stubStart,
   171  			stubROMapEnd,
   172  			unix.PROT_WRITE|unix.PROT_READ,
   173  			unix.MAP_PRIVATE|unix.MAP_ANONYMOUS,
   174  			0 /* fd */, 0 /* offset */)
   175  		if addr == stubStart {
   176  			break
   177  		}
   178  		if addr != 0 {
   179  			// Unmap the region we've mapped accidentally.
   180  			unix.RawSyscall(unix.SYS_MUNMAP, addr, stubROMapEnd, 0)
   181  		}
   182  		stubStart = uintptr(0)
   183  	}
   184  
   185  	if stubStart == 0 {
   186  		// This will happen only if we exhaust the entire address
   187  		// space, and it will take a long, long time.
   188  		panic("failed to map stub")
   189  	}
   190  	// Randomize stubSysmsgStack address.
   191  	gap := uintptr(rand.Uint64()) * hostarch.PageSize % (maximumUserAddress - stubStart - mapLen)
   192  	stubSysmsgStack += uintptr(gap)
   193  	stubContextQueueRegion += uintptr(gap)
   194  	stubContextRegion += uintptr(gap)
   195  
   196  	// Copy the stub to the address.
   197  	targetSlice := unsafeSlice(stubStart, procStubLen)
   198  	copy(targetSlice, procStubSlice)
   199  	stubInitProcess = stubStart
   200  
   201  	stubSysmsgStart += stubStart
   202  	stubSysmsgStack += stubStart
   203  	stubROMapEnd += stubStart
   204  	stubContextQueueRegion += stubStart
   205  	stubSpinningThreadQueueAddr += stubStart
   206  	stubContextRegion += stubStart
   207  
   208  	// Align stubSysmsgStack to the per-thread stack size.
   209  	// Look at sysmsg/sighandler.c:sysmsg_addr() for more details.
   210  	if offset := stubSysmsgStack % sysmsg.PerThreadMemSize; offset != 0 {
   211  		stubSysmsgStack += sysmsg.PerThreadMemSize - offset
   212  	}
   213  	stubSysmsgRules += stubStart
   214  
   215  	targetSlice = unsafeSlice(stubSysmsgStart, stubSysmsgLen)
   216  	copy(targetSlice, sysmsg.SighandlerBlob)
   217  
   218  	// Initialize stub globals
   219  	p := (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_deep_sleep_timeout)))
   220  	*p = deepSleepTimeout
   221  	p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_context_region)))
   222  	*p = uint64(stubContextRegion)
   223  	p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_stub_start)))
   224  	*p = uint64(stubStart)
   225  	archState := (*sysmsg.ArchState)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_arch_state)))
   226  	archState.Init()
   227  	p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_context_queue_addr)))
   228  	*p = uint64(stubContextQueueRegion)
   229  	p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_spinning_queue_addr)))
   230  	*p = uint64(stubSpinningThreadQueueAddr)
   231  
   232  	prepareSeccompRules(stubSysmsgStart, stubSysmsgRules, stubSysmsgRulesLen)
   233  
   234  	// Make the stub executable.
   235  	if _, _, errno := unix.RawSyscall(
   236  		unix.SYS_MPROTECT,
   237  		stubStart,
   238  		stubROMapEnd-stubStart,
   239  		unix.PROT_EXEC|unix.PROT_READ); errno != 0 {
   240  		panic("mprotect failed: " + errno.Error())
   241  	}
   242  
   243  	// Set the end.
   244  	stubEnd = stubStart + mapLen + uintptr(gap)
   245  	log.Debugf("stubStart %x stubSysmsgStart %x stubSysmsgStack %x, stubContextQueue %x, stubThreadContextRegion %x, mapLen %x", stubStart, stubSysmsgStart, stubSysmsgStack, stubContextQueueRegion, stubContextRegion, mapLen)
   246  	log.Debugf(archState.String())
   247  }