github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/platform/systrap/syscall_thread.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package systrap 16 17 import ( 18 "fmt" 19 "sync/atomic" 20 21 "github.com/MerlinKodo/gvisor/pkg/hostarch" 22 "github.com/MerlinKodo/gvisor/pkg/sentry/arch" 23 "github.com/MerlinKodo/gvisor/pkg/sentry/memmap" 24 "github.com/MerlinKodo/gvisor/pkg/sentry/pgalloc" 25 "github.com/MerlinKodo/gvisor/pkg/sentry/platform/systrap/sysmsg" 26 "github.com/MerlinKodo/gvisor/pkg/sentry/usage" 27 "golang.org/x/sys/unix" 28 ) 29 30 // The syscall message consists of sentry and stub messages. 31 const syscallThreadMessageSize = hostarch.PageSize * 2 32 33 // syscallThread implements the process of calling syscalls in a stub process. 34 // 35 // Each syscall thread owns a shared memory region to communicate with the 36 // Sentry. This region consists of two pages. The first page called 37 // sentryMessage is mapped as read-only in the stub address space. The second 38 // page called stubMessage is mapped as read-write in the stub process. 39 // 40 // Any memory regions that are mapped as read-write in a stub address space can 41 // be changed from a user code. This means that we can't trust the content of 42 // stubMessage, but it is used to receive a syscall return code. Therefore 43 // syscallThread can be used only in these cases: 44 // - If a system call never fails (e.g munmap). 45 // - If a system call has to return only one know value or if it fails, 46 // it doesn't not reveal any data (e.g. mmap). 47 type syscallThread struct { 48 // subproc is a link to the subprocess which is used to call native 49 // system calls and track when a sysmsg thread has to be recreated. 50 // Look at getSysmsgThread() for more details. 51 subproc *subprocess 52 53 // thread is a thread identifier. 54 thread *thread 55 56 // stackRange is the range for the sentry syscall message in the memory 57 // file. 58 stackRange memmap.FileRange 59 60 // sentryAddr is the address of the shared memory region in the Sentry 61 // address space. 62 sentryAddr uintptr 63 // stubAddr is the address of the shared memory region in the stub 64 // address space. 65 stubAddr uintptr 66 67 // sentryMessage is the first page of the share message that can't be 68 // modified by the stub thread. 69 sentryMessage *syscallSentryMessage 70 // stubMessage is the second page of the shared message that can be 71 // modified by the stub thread. 72 stubMessage *syscallStubMessage 73 } 74 75 func (t *syscallThread) init() error { 76 // Allocate a new shared memory message. 77 opts := pgalloc.AllocOpts{ 78 Kind: usage.System, 79 Dir: pgalloc.TopDown, 80 } 81 fr, err := t.subproc.memoryFile.Allocate(syscallThreadMessageSize, opts) 82 if err != nil { 83 return err 84 } 85 86 t.stackRange = fr 87 t.stubAddr = stubSysmsgStack + sysmsg.PerThreadMemSize*uintptr(t.thread.sysmsgStackID) 88 err = t.mapMessageIntoStub() 89 if err != nil { 90 t.destroy() 91 return err 92 } 93 94 // Map the stack into the sentry. 95 sentryAddr, _, errno := unix.RawSyscall6( 96 unix.SYS_MMAP, 97 0, 98 syscallThreadMessageSize, 99 unix.PROT_WRITE|unix.PROT_READ, 100 unix.MAP_SHARED|unix.MAP_FILE, 101 uintptr(t.subproc.memoryFile.FD()), uintptr(fr.Start)) 102 if errno != 0 { 103 t.destroy() 104 return fmt.Errorf("mmap failed: %v", errno) 105 } 106 t.sentryAddr = sentryAddr 107 108 t.initRequestReplyAddresses(sentryAddr) 109 return nil 110 } 111 112 func (t *syscallThread) destroy() { 113 if t.sentryAddr != 0 { 114 _, _, errno := unix.RawSyscall6( 115 unix.SYS_MUNMAP, 116 t.sentryAddr, 117 syscallThreadMessageSize, 118 0, 0, 0, 0) 119 if errno != 0 { 120 panic(fmt.Sprintf("mumap failed: %v", errno)) 121 } 122 } 123 if t.stubAddr != 0 { 124 _, err := t.thread.syscallIgnoreInterrupt(&t.thread.initRegs, unix.SYS_MUNMAP, 125 arch.SyscallArgument{Value: t.stubAddr}, 126 arch.SyscallArgument{Value: uintptr(syscallThreadMessageSize)}) 127 if err != nil { 128 panic(fmt.Sprintf("munmap failed: %v", err)) 129 } 130 } 131 t.subproc.memoryFile.DecRef(t.stackRange) 132 t.subproc.sysmsgStackPool.Put(t.thread.sysmsgStackID) 133 } 134 135 // mapMessageIntoStub maps the syscall message into the stub process address space. 136 func (t *syscallThread) mapMessageIntoStub() error { 137 // Map sentryMessage as read-only. 138 _, err := t.thread.syscallIgnoreInterrupt(&t.thread.initRegs, unix.SYS_MMAP, 139 arch.SyscallArgument{Value: t.stubAddr}, 140 arch.SyscallArgument{Value: uintptr(hostarch.PageSize)}, 141 arch.SyscallArgument{Value: uintptr(unix.PROT_READ)}, 142 arch.SyscallArgument{Value: unix.MAP_SHARED | unix.MAP_FILE | unix.MAP_FIXED}, 143 arch.SyscallArgument{Value: uintptr(t.subproc.memoryFile.FD())}, 144 arch.SyscallArgument{Value: uintptr(t.stackRange.Start)}) 145 if err != nil { 146 return err 147 } 148 // Map stubMessage as read-write. 149 _, err = t.thread.syscallIgnoreInterrupt(&t.thread.initRegs, unix.SYS_MMAP, 150 arch.SyscallArgument{Value: t.stubAddr + syscallStubMessageOffset}, 151 arch.SyscallArgument{Value: uintptr(hostarch.PageSize)}, 152 arch.SyscallArgument{Value: uintptr(unix.PROT_READ | unix.PROT_WRITE)}, 153 arch.SyscallArgument{Value: unix.MAP_SHARED | unix.MAP_FILE | unix.MAP_FIXED}, 154 arch.SyscallArgument{Value: uintptr(t.subproc.memoryFile.FD())}, 155 arch.SyscallArgument{Value: uintptr(t.stackRange.Start + hostarch.PageSize)}) 156 return err 157 } 158 159 // attach attaches to the stub thread with ptrace and unlock signals. 160 func (t *syscallThread) attach() { 161 t.thread.attach() 162 // We need to unblock signals, because the TRAP signal is used to run 163 // syscalls via ptrace. 164 t.unmaskAllSignalsAttached() 165 } 166 167 func (t *syscallThread) syscall(sysno uintptr, args ...arch.SyscallArgument) (uintptr, error) { 168 sentryMsg := t.sentryMessage 169 stubMsg := t.stubMessage 170 sentryMsg.sysno = uint64(sysno) 171 for i := 0; i < len(sentryMsg.args); i++ { 172 if i < len(args) { 173 sentryMsg.args[i] = uint64(args[i].Value) 174 } else { 175 sentryMsg.args[i] = 0 176 } 177 } 178 179 // Notify the syscall thread about a new syscall request. 180 atomic.AddUint32(&sentryMsg.state, 1) 181 futexWakeUint32(&sentryMsg.state) 182 183 // Wait for reply. 184 // 185 // futex waits for sentryMsg.state that isn't changed, so it will 186 // returns only only when the other side will call FUTEX_WAKE. 187 futexWaitWake(&sentryMsg.state, atomic.LoadUint32(&sentryMsg.state)) 188 189 return uintptr(stubMsg.ret), nil 190 }