gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/platform/systrap/stub_unsafe.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package systrap 16 17 import ( 18 "math/rand" 19 "reflect" 20 "unsafe" 21 22 "golang.org/x/sys/unix" 23 "gvisor.dev/gvisor/pkg/abi/linux" 24 "gvisor.dev/gvisor/pkg/bpf" 25 "gvisor.dev/gvisor/pkg/hostarch" 26 "gvisor.dev/gvisor/pkg/log" 27 "gvisor.dev/gvisor/pkg/safecopy" 28 "gvisor.dev/gvisor/pkg/sentry/platform/systrap/sysmsg" 29 ) 30 31 // initStubProcess is defined in arch-specific assembly. 32 func initStubProcess() 33 34 // addrOfInitStubProcess returns the start address of initStubProcess. 35 // 36 // In Go 1.17+, Go references to assembly functions resolve to an ABIInternal 37 // wrapper function rather than the function itself. We must reference from 38 // assembly to get the ABI0 (i.e., primary) address. 39 func addrOfInitStubProcess() uintptr 40 41 // stubCall calls the stub at the given address with the given pid. 42 func stubCall(addr, pid uintptr) 43 44 // unsafeSlice returns a slice for the given address and length. 45 func unsafeSlice(addr uintptr, length int) (slice []byte) { 46 sh := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) 47 sh.Data = addr 48 sh.Len = length 49 sh.Cap = length 50 return 51 } 52 53 // prepareSeccompRules compiles stub process seccomp filters and fill 54 // the sock_fprog structure. So the stub process will only need to call 55 // seccomp system call to apply these filters. 56 // 57 //go:nosplit 58 func prepareSeccompRules(stubSysmsgStart, 59 stubSysmsgRules, stubSysmsgRulesLen, 60 stubSyscallRules, stubSyscallRulesLen uintptr) { 61 instrs := sysmsgThreadRules(stubSysmsgStart) 62 copySeccompRulesToStub(instrs, stubSysmsgRules, stubSysmsgRulesLen) 63 64 instrs = sysmsgSyscallNotifyRules() 65 copySeccompRulesToStub(instrs, stubSyscallRules, stubSyscallRulesLen) 66 } 67 68 func copySeccompRulesToStub(instrs []bpf.Instruction, stubAddr, size uintptr) { 69 progLen := len(instrs) * int(unsafe.Sizeof(bpf.Instruction{})) 70 progPtr := stubAddr + unsafe.Sizeof(linux.SockFprog{}) 71 72 if progLen+int(unsafe.Sizeof(linux.SockFprog{})) > int(size) { 73 panic("not enough space for sysmsg seccomp rules") 74 } 75 76 var targetSlice []bpf.Instruction 77 sh := (*reflect.SliceHeader)(unsafe.Pointer(&targetSlice)) 78 sh.Data = progPtr 79 sh.Cap = len(instrs) 80 sh.Len = sh.Cap 81 82 copy(targetSlice, instrs) 83 84 // stubSysmsgRules and progPtr are addresses from a stub mapping which 85 // is mapped once and never moved, so it is safe to use unsafe.Pointer 86 // this way for them. 87 sockProg := (*linux.SockFprog)(unsafe.Pointer(stubAddr)) 88 sockProg.Len = uint16(len(instrs)) 89 sockProg.Filter = (*linux.BPFInstruction)(unsafe.Pointer(progPtr)) 90 // Make the seccomp rules stub read-only. 91 if _, _, errno := unix.RawSyscall( 92 unix.SYS_MPROTECT, 93 stubAddr, 94 size, 95 unix.PROT_READ); errno != 0 { 96 panic("mprotect failed: " + errno.Error()) 97 } 98 } 99 100 // stubInit allocates and initializes the stub memory region which includes: 101 // - the stub code to do initial initialization of a stub process. 102 // - the sysmsg signal handler code to notify sentry about new events such as 103 // system calls, memory faults, etc. 104 // - precompiled seccomp rules to trap application system calls. 105 // - reserved space for stub-thread stack regions. 106 func stubInit() { 107 // *--------stubStart-------------------* 108 // |--------stubInitProcess-------------| 109 // | stub code to init stub processes | 110 // |--------stubSysmsgStart-------------| 111 // | sysmsg code | 112 // |--------stubSysmsgRuleStart---------| 113 // | precompiled sysmsg seccomp rules | 114 // |--------guard page------------------| 115 // |--------random gap------------------| 116 // | | 117 // |--------stubSysmsgStack-------------| 118 // | Reserved space for per-thread | 119 // | sysmsg stacks. | 120 // |----------stubContextQueue----------| 121 // | Shared ringbuffer queue for stubs | 122 // | to select the next context. | 123 // |--------stubThreadContextRegion-----| 124 // | Reserved space for thread contexts | 125 // *------------------------------------* 126 127 // Grab the existing stub. 128 procStubBegin := addrOfInitStubProcess() 129 procStubLen := int(safecopy.FindEndAddress(procStubBegin) - procStubBegin) 130 procStubSlice := unsafeSlice(procStubBegin, procStubLen) 131 mapLen, _ := hostarch.PageRoundUp(uintptr(procStubLen)) 132 133 stubSysmsgStart = mapLen 134 stubSysmsgLen := len(sysmsg.SighandlerBlob) 135 mapLen, _ = hostarch.PageRoundUp(mapLen + uintptr(stubSysmsgLen)) 136 137 stubSysmsgRules = mapLen 138 stubSysmsgRulesLen = hostarch.PageSize * 2 139 mapLen += stubSysmsgRulesLen 140 stubSyscallRules = mapLen 141 stubSyscallRulesLen = hostarch.PageSize 142 mapLen += stubSyscallRulesLen 143 144 stubROMapEnd = mapLen 145 // Add a guard page. 146 mapLen += hostarch.PageSize 147 stubSysmsgStack = mapLen 148 149 // Allocate maxGuestThreads plus ONE because each per-thread stack 150 // has to be aligned to sysmsg.PerThreadMemSize. 151 // Look at sysmsg/sighandler.c:sysmsg_addr() for more details. 152 mapLen, _ = hostarch.PageRoundUp(mapLen + sysmsg.PerThreadMemSize*(uintptr(maxChildThreads+1))) 153 154 // Allocate context queue region 155 stubContextQueueRegion = mapLen 156 stubContextQueueRegionLen, _ = hostarch.PageRoundUp(unsafe.Sizeof(contextQueue{})) 157 mapLen += stubContextQueueRegionLen 158 159 stubSpinningThreadQueueAddr = mapLen 160 mapLen += sysmsg.SpinningQueueMemSize 161 162 // Allocate thread context region 163 stubContextRegion = mapLen 164 stubContextRegionLen = sysmsg.AllocatedSizeofThreadContextStruct * (maxGuestContexts + 1) 165 mapLen, _ = hostarch.PageRoundUp(mapLen + stubContextRegionLen) 166 167 // Randomize stubStart address. 168 randomOffset := uintptr(rand.Uint64() * hostarch.PageSize) 169 maxRandomOffset := maxRandomOffsetOfStubAddress - mapLen 170 stubStart = uintptr(0) 171 for offset := uintptr(0); offset < maxRandomOffset; offset += hostarch.PageSize { 172 stubStart = maxStubUserAddress + (randomOffset+offset)%maxRandomOffset 173 // Map the target address for the stub. 174 // 175 // We don't use FIXED here because we don't want to unmap 176 // something that may have been there already. We just walk 177 // down the address space until we find a place where the stub 178 // can be placed. 179 addr, _, _ := unix.RawSyscall6( 180 unix.SYS_MMAP, 181 stubStart, 182 stubROMapEnd, 183 unix.PROT_WRITE|unix.PROT_READ, 184 unix.MAP_PRIVATE|unix.MAP_ANONYMOUS, 185 0 /* fd */, 0 /* offset */) 186 if addr == stubStart { 187 break 188 } 189 if addr != 0 { 190 // Unmap the region we've mapped accidentally. 191 unix.RawSyscall(unix.SYS_MUNMAP, addr, stubROMapEnd, 0) 192 } 193 stubStart = uintptr(0) 194 } 195 196 if stubStart == 0 { 197 // This will happen only if we exhaust the entire address 198 // space, and it will take a long, long time. 199 panic("failed to map stub") 200 } 201 // Randomize stubSysmsgStack address. 202 gap := uintptr(rand.Uint64()) * hostarch.PageSize % (maximumUserAddress - stubStart - mapLen) 203 stubSysmsgStack += uintptr(gap) 204 stubContextQueueRegion += uintptr(gap) 205 stubContextRegion += uintptr(gap) 206 207 // Copy the stub to the address. 208 targetSlice := unsafeSlice(stubStart, procStubLen) 209 copy(targetSlice, procStubSlice) 210 stubInitProcess = stubStart 211 212 stubSysmsgStart += stubStart 213 stubSysmsgStack += stubStart 214 stubROMapEnd += stubStart 215 stubContextQueueRegion += stubStart 216 stubSpinningThreadQueueAddr += stubStart 217 stubContextRegion += stubStart 218 219 // Align stubSysmsgStack to the per-thread stack size. 220 // Look at sysmsg/sighandler.c:sysmsg_addr() for more details. 221 if offset := stubSysmsgStack % sysmsg.PerThreadMemSize; offset != 0 { 222 stubSysmsgStack += sysmsg.PerThreadMemSize - offset 223 } 224 stubSysmsgRules += stubStart 225 stubSyscallRules += stubStart 226 227 targetSlice = unsafeSlice(stubSysmsgStart, stubSysmsgLen) 228 copy(targetSlice, sysmsg.SighandlerBlob) 229 230 // Initialize stub globals 231 p := (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_deep_sleep_timeout))) 232 *p = deepSleepTimeout 233 p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_context_region))) 234 *p = uint64(stubContextRegion) 235 p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_stub_start))) 236 *p = uint64(stubStart) 237 archState := (*sysmsg.ArchState)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_arch_state))) 238 archState.Init() 239 p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_context_queue_addr))) 240 *p = uint64(stubContextQueueRegion) 241 p = (*uint64)(unsafe.Pointer(stubSysmsgStart + uintptr(sysmsg.Sighandler_blob_offset____export_spinning_queue_addr))) 242 *p = uint64(stubSpinningThreadQueueAddr) 243 244 prepareSeccompRules(stubSysmsgStart, 245 stubSysmsgRules, stubSysmsgRulesLen, 246 stubSyscallRules, stubSyscallRulesLen) 247 248 // Make the stub executable. 249 if _, _, errno := unix.RawSyscall( 250 unix.SYS_MPROTECT, 251 stubStart, 252 stubROMapEnd-stubStart, 253 unix.PROT_EXEC|unix.PROT_READ); errno != 0 { 254 panic("mprotect failed: " + errno.Error()) 255 } 256 257 // Set the end. 258 stubEnd = stubStart + mapLen + uintptr(gap) 259 log.Debugf("stubStart %x stubSysmsgStart %x stubSysmsgStack %x, stubContextQueue %x, stubThreadContextRegion %x, mapLen %x", stubStart, stubSysmsgStart, stubSysmsgStack, stubContextQueueRegion, stubContextRegion, mapLen) 260 log.Debugf(archState.String()) 261 }