gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/platform/systrap/stub_unsafe.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package systrap
    16  
    17  import (
    18  	"math/rand"
    19  	"reflect"
    20  	"unsafe"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"gvisor.dev/gvisor/pkg/abi/linux"
    24  	"gvisor.dev/gvisor/pkg/bpf"
    25  	"gvisor.dev/gvisor/pkg/hostarch"
    26  	"gvisor.dev/gvisor/pkg/log"
    27  	"gvisor.dev/gvisor/pkg/safecopy"
    28  	"gvisor.dev/gvisor/pkg/sentry/platform/systrap/sysmsg"
    29  )
    30  
    31  // initStubProcess is defined in arch-specific assembly.
    32  func initStubProcess()
    33  
    34  // addrOfInitStubProcess returns the start address of initStubProcess.
    35  //
    36  // In Go 1.17+, Go references to assembly functions resolve to an ABIInternal
    37  // wrapper function rather than the function itself. We must reference from
    38  // assembly to get the ABI0 (i.e., primary) address.
    39  func addrOfInitStubProcess() uintptr
    40  
    41  // stubCall calls the stub at the given address with the given pid.
    42  func stubCall(addr, pid uintptr)
    43  
    44  // unsafeSlice returns a slice for the given address and length.
    45  func unsafeSlice(addr uintptr, length int) (slice []byte) {
    46  	sh := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
    47  	sh.Data = addr
    48  	sh.Len = length
    49  	sh.Cap = length
    50  	return
    51  }
    52  
    53  // prepareSeccompRules compiles stub process seccomp filters and fill
    54  // the sock_fprog structure. So the stub process will only need to call
    55  // seccomp system call to apply these filters.
    56  //
    57  //go:nosplit
    58  func prepareSeccompRules(stubSysmsgStart,
    59  	stubSysmsgRules, stubSysmsgRulesLen,
    60  	stubSyscallRules, stubSyscallRulesLen uintptr) {
    61  	instrs := sysmsgThreadRules(stubSysmsgStart)
    62  	copySeccompRulesToStub(instrs, stubSysmsgRules, stubSysmsgRulesLen)
    63  
    64  	instrs = sysmsgSyscallNotifyRules()
    65  	copySeccompRulesToStub(instrs, stubSyscallRules, stubSyscallRulesLen)
    66  }
    67  
    68  func copySeccompRulesToStub(instrs []bpf.Instruction, stubAddr, size uintptr) {
    69  	progLen := len(instrs) * int(unsafe.Sizeof(bpf.Instruction{}))
    70  	progPtr := stubAddr + unsafe.Sizeof(linux.SockFprog{})
    71  
    72  	if progLen+int(unsafe.Sizeof(linux.SockFprog{})) > int(size) {
    73  		panic("not enough space for sysmsg seccomp rules")
    74  	}
    75  
    76  	var targetSlice []bpf.Instruction
    77  	sh := (*reflect.SliceHeader)(unsafe.Pointer(&targetSlice))
    78  	sh.Data = progPtr
    79  	sh.Cap = len(instrs)
    80  	sh.Len = sh.Cap
    81  
    82  	copy(targetSlice, instrs)
    83  
    84  	// stubSysmsgRules and progPtr are addresses from a stub mapping which
    85  	// is mapped once and never moved, so it is safe to use unsafe.Pointer
    86  	// this way for them.
    87  	sockProg := (*linux.SockFprog)(unsafe.Pointer(stubAddr))
    88  	sockProg.Len = uint16(len(instrs))
    89  	sockProg.Filter = (*linux.BPFInstruction)(unsafe.Pointer(progPtr))
    90  	// Make the seccomp rules stub read-only.
    91  	if _, _, errno := unix.RawSyscall(
    92  		unix.SYS_MPROTECT,
    93  		stubAddr,
    94  		size,
    95  		unix.PROT_READ); errno != 0 {
    96  		panic("mprotect failed: " + errno.Error())
    97  	}
    98  }
    99  
   100  // stubInit allocates and  initializes the stub memory region which includes:
   101  //   - the stub code to do initial initialization of a stub process.
   102  //   - the sysmsg signal handler code to notify sentry about new events such as
   103  //     system calls, memory faults, etc.
   104  //   - precompiled seccomp rules to trap application system calls.
   105  //   - reserved space for stub-thread stack regions.
   106  func stubInit() {
   107  	// *--------stubStart-------------------*
   108  	// |--------stubInitProcess-------------|
   109  	// | stub code to init stub processes   |
   110  	// |--------stubSysmsgStart-------------|
   111  	// | sysmsg code                        |
   112  	// |--------stubSysmsgRuleStart---------|
   113  	// | precompiled sysmsg seccomp rules   |
   114  	// |--------guard page------------------|
   115  	// |--------random gap------------------|
   116  	// |                                    |
   117  	// |--------stubSysmsgStack-------------|
   118  	// | Reserved space for per-thread      |
   119  	// | sysmsg stacks.                     |
   120  	// |----------stubContextQueue----------|
   121  	// | Shared ringbuffer queue for stubs  |
   122  	// | to select the next context.        |
   123  	// |--------stubThreadContextRegion-----|
   124  	// | Reserved space for thread contexts |
   125  	// *------------------------------------*
   126  
   127  	// Grab the existing stub.
   128  	procStubBegin := addrOfInitStubProcess()
   129  	procStubLen := int(safecopy.FindEndAddress(procStubBegin) - procStubBegin)
   130  	procStubSlice := unsafeSlice(procStubBegin, procStubLen)
   131  	mapLen, _ := hostarch.PageRoundUp(uintptr(procStubLen))
   132  
   133  	stubSysmsgStart = mapLen
   134  	stubSysmsgLen := len(sysmsg.SighandlerBlob)
   135  	mapLen, _ = hostarch.PageRoundUp(mapLen + uintptr(stubSysmsgLen))
   136  
   137  	stubSysmsgRules = mapLen
   138  	stubSysmsgRulesLen = hostarch.PageSize * 2
   139  	mapLen += stubSysmsgRulesLen
   140  	stubSyscallRules = mapLen
   141  	stubSyscallRulesLen = hostarch.PageSize
   142  	mapLen += stubSyscallRulesLen
   143  
   144  	stubROMapEnd = mapLen
   145  	// Add a guard page.
   146  	mapLen += hostarch.PageSize
   147  	stubSysmsgStack = mapLen
   148  
   149  	// Allocate maxGuestThreads plus ONE because each per-thread stack
   150  	// has to be aligned to sysmsg.PerThreadMemSize.
   151  	// Look at sysmsg/sighandler.c:sysmsg_addr() for more details.
   152  	mapLen, _ = hostarch.PageRoundUp(mapLen + sysmsg.PerThreadMemSize*(uintptr(maxChildThreads+1)))
   153  
   154  	// Allocate context queue region
   155  	stubContextQueueRegion = mapLen
   156  	stubContextQueueRegionLen, _ = hostarch.PageRoundUp(unsafe.Sizeof(contextQueue{}))
   157  	mapLen += stubContextQueueRegionLen
   158  
   159  	stubSpinningThreadQueueAddr = mapLen
   160  	mapLen += sysmsg.SpinningQueueMemSize
   161  
   162  	// Allocate thread context region
   163  	stubContextRegion = mapLen
   164  	stubContextRegionLen = sysmsg.AllocatedSizeofThreadContextStruct * (maxGuestContexts + 1)
   165  	mapLen, _ = hostarch.PageRoundUp(mapLen + stubContextRegionLen)
   166  
   167  	// Randomize stubStart address.
   168  	randomOffset := uintptr(rand.Uint64() * hostarch.PageSize)
   169  	maxRandomOffset := maxRandomOffsetOfStubAddress - mapLen
   170  	stubStart = uintptr(0)
   171  	for offset := uintptr(0); offset < maxRandomOffset; offset += hostarch.PageSize {
   172  		stubStart = maxStubUserAddress + (randomOffset+offset)%maxRandomOffset
   173  		// Map the target address for the stub.
   174  		//
   175  		// We don't use FIXED here because we don't want to unmap
   176  		// something that may have been there already. We just walk
   177  		// down the address space until we find a place where the stub
   178  		// can be placed.
   179  		addr, _, _ := unix.RawSyscall6(
   180  			unix.SYS_MMAP,
   181  			stubStart,
   182  			stubROMapEnd,
   183  			unix.PROT_WRITE|unix.PROT_READ,
   184  			unix.MAP_PRIVATE|unix.MAP_ANONYMOUS,
   185  			0 /* fd */, 0 /* offset */)
   186  		if addr == stubStart {
   187  			break
   188  		}
   189  		if addr != 0 {
   190  			// Unmap the region we've mapped accidentally.
   191  			unix.RawSyscall(unix.SYS_MUNMAP, addr, stubROMapEnd, 0)
   192  		}
   193  		stubStart = uintptr(0)
   194  	}
   195  
   196  	if stubStart == 0 {
   197  		// This will happen only if we exhaust the entire address
   198  		// space, and it will take a long, long time.
   199  		panic("failed to map stub")
   200  	}
   201  	// Randomize stubSysmsgStack address.
   202  	gap := uintptr(rand.Uint64()) * hostarch.PageSize % (maximumUserAddress - stubStart - mapLen)
   203  	stubSysmsgStack += uintptr(gap)
   204  	stubContextQueueRegion += uintptr(gap)
   205  	stubContextRegion += uintptr(gap)
   206  
   207  	// Copy the stub to the address.
   208  	targetSlice := unsafeSlice(stubStart, procStubLen)
   209  	copy(targetSlice, procStubSlice)
   210  	stubInitProcess = stubStart
   211  
   212  	stubSysmsgStart += stubStart
   213  	stubSysmsgStack += stubStart
   214  	stubROMapEnd += stubStart
   215  	stubContextQueueRegion += stubStart
   216  	stubSpinningThreadQueueAddr += stubStart
   217  	stubContextRegion += stubStart
   218  
   219  	// Align stubSysmsgStack to the per-thread stack size.
   220  	// Look at sysmsg/sighandler.c:sysmsg_addr() for more details.
   221  	if offset := stubSysmsgStack % sysmsg.PerThreadMemSize; offset != 0 {
   222  		stubSysmsgStack += sysmsg.PerThreadMemSize - offset
   223  	}
   224  	stubSysmsgRules += stubStart
   225  	stubSyscallRules += stubStart
   226  
   227  	targetSlice = unsafeSlice(stubSysmsgStart, stubSysmsgLen)
   228  	copy(targetSlice, sysmsg.SighandlerBlob)
   229  
   230  	// Initialize stub globals
   231  	p := (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_deep_sleep_timeout)))
   232  	*p = deepSleepTimeout
   233  	p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_context_region)))
   234  	*p = uint64(stubContextRegion)
   235  	p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_stub_start)))
   236  	*p = uint64(stubStart)
   237  	archState := (*sysmsg.ArchState)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_arch_state)))
   238  	archState.Init()
   239  	p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_context_queue_addr)))
   240  	*p = uint64(stubContextQueueRegion)
   241  	p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_spinning_queue_addr)))
   242  	*p = uint64(stubSpinningThreadQueueAddr)
   243  
   244  	prepareSeccompRules(stubSysmsgStart,
   245  		stubSysmsgRules, stubSysmsgRulesLen,
   246  		stubSyscallRules, stubSyscallRulesLen)
   247  
   248  	// Make the stub executable.
   249  	if _, _, errno := unix.RawSyscall(
   250  		unix.SYS_MPROTECT,
   251  		stubStart,
   252  		stubROMapEnd-stubStart,
   253  		unix.PROT_EXEC|unix.PROT_READ); errno != 0 {
   254  		panic("mprotect failed: " + errno.Error())
   255  	}
   256  
   257  	// Set the end.
   258  	stubEnd = stubStart + mapLen + uintptr(gap)
   259  	log.Debugf("stubStart %x stubSysmsgStart %x stubSysmsgStack %x, stubContextQueue %x, stubThreadContextRegion %x, mapLen %x", stubStart, stubSysmsgStart, stubSysmsgStack, stubContextQueueRegion, stubContextRegion, mapLen)
   260  	log.Debugf(archState.String())
   261  }