github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/platform/ptrace/subprocess_amd64.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build amd64 16 17 package ptrace 18 19 import ( 20 "fmt" 21 "strings" 22 23 "golang.org/x/sys/unix" 24 "github.com/SagerNet/gvisor/pkg/abi/linux" 25 "github.com/SagerNet/gvisor/pkg/seccomp" 26 "github.com/SagerNet/gvisor/pkg/sentry/arch" 27 ) 28 29 const ( 30 // maximumUserAddress is the largest possible user address. 31 maximumUserAddress = 0x7ffffffff000 32 33 // stubInitAddress is the initial attempt link address for the stub. 34 stubInitAddress = 0x7fffffff0000 35 36 // initRegsRipAdjustment is the size of the syscall instruction. 37 initRegsRipAdjustment = 2 38 ) 39 40 // resetSysemuRegs sets up emulation registers. 41 // 42 // This should be called prior to calling sysemu. 43 func (t *thread) resetSysemuRegs(regs *arch.Registers) { 44 regs.Cs = t.initRegs.Cs 45 regs.Ss = t.initRegs.Ss 46 regs.Ds = t.initRegs.Ds 47 regs.Es = t.initRegs.Es 48 regs.Fs = t.initRegs.Fs 49 regs.Gs = t.initRegs.Gs 50 } 51 52 // createSyscallRegs sets up syscall registers. 53 // 54 // This should be called to generate registers for a system call. 55 func createSyscallRegs(initRegs *arch.Registers, sysno uintptr, args ...arch.SyscallArgument) arch.Registers { 56 // Copy initial registers. 57 regs := *initRegs 58 59 // Set our syscall number. 60 regs.Rax = uint64(sysno) 61 if len(args) >= 1 { 62 regs.Rdi = args[0].Uint64() 63 } 64 if len(args) >= 2 { 65 regs.Rsi = args[1].Uint64() 66 } 67 if len(args) >= 3 { 68 regs.Rdx = args[2].Uint64() 69 } 70 if len(args) >= 4 { 71 regs.R10 = args[3].Uint64() 72 } 73 if len(args) >= 5 { 74 regs.R8 = args[4].Uint64() 75 } 76 if len(args) >= 6 { 77 regs.R9 = args[5].Uint64() 78 } 79 80 return regs 81 } 82 83 // isSingleStepping determines if the registers indicate single-stepping. 84 func isSingleStepping(regs *arch.Registers) bool { 85 return (regs.Eflags & arch.X86TrapFlag) != 0 86 } 87 88 // updateSyscallRegs updates registers after finishing sysemu. 89 func updateSyscallRegs(regs *arch.Registers) { 90 // Ptrace puts -ENOSYS in rax on syscall-enter-stop. 91 regs.Rax = regs.Orig_rax 92 } 93 94 // syscallReturnValue extracts a sensible return from registers. 95 func syscallReturnValue(regs *arch.Registers) (uintptr, error) { 96 rval := int64(regs.Rax) 97 if rval < 0 { 98 return 0, unix.Errno(-rval) 99 } 100 return uintptr(rval), nil 101 } 102 103 func dumpRegs(regs *arch.Registers) string { 104 var m strings.Builder 105 106 fmt.Fprintf(&m, "Registers:\n") 107 fmt.Fprintf(&m, "\tR15\t = %016x\n", regs.R15) 108 fmt.Fprintf(&m, "\tR14\t = %016x\n", regs.R14) 109 fmt.Fprintf(&m, "\tR13\t = %016x\n", regs.R13) 110 fmt.Fprintf(&m, "\tR12\t = %016x\n", regs.R12) 111 fmt.Fprintf(&m, "\tRbp\t = %016x\n", regs.Rbp) 112 fmt.Fprintf(&m, "\tRbx\t = %016x\n", regs.Rbx) 113 fmt.Fprintf(&m, "\tR11\t = %016x\n", regs.R11) 114 fmt.Fprintf(&m, "\tR10\t = %016x\n", regs.R10) 115 fmt.Fprintf(&m, "\tR9\t = %016x\n", regs.R9) 116 fmt.Fprintf(&m, "\tR8\t = %016x\n", regs.R8) 117 fmt.Fprintf(&m, "\tRax\t = %016x\n", regs.Rax) 118 fmt.Fprintf(&m, "\tRcx\t = %016x\n", regs.Rcx) 119 fmt.Fprintf(&m, "\tRdx\t = %016x\n", regs.Rdx) 120 fmt.Fprintf(&m, "\tRsi\t = %016x\n", regs.Rsi) 121 fmt.Fprintf(&m, "\tRdi\t = %016x\n", regs.Rdi) 122 fmt.Fprintf(&m, "\tOrig_rax = %016x\n", regs.Orig_rax) 123 fmt.Fprintf(&m, "\tRip\t = %016x\n", regs.Rip) 124 fmt.Fprintf(&m, "\tCs\t = %016x\n", regs.Cs) 125 fmt.Fprintf(&m, "\tEflags\t = %016x\n", regs.Eflags) 126 fmt.Fprintf(&m, "\tRsp\t = %016x\n", regs.Rsp) 127 fmt.Fprintf(&m, "\tSs\t = %016x\n", regs.Ss) 128 fmt.Fprintf(&m, "\tFs_base\t = %016x\n", regs.Fs_base) 129 fmt.Fprintf(&m, "\tGs_base\t = %016x\n", regs.Gs_base) 130 fmt.Fprintf(&m, "\tDs\t = %016x\n", regs.Ds) 131 fmt.Fprintf(&m, "\tEs\t = %016x\n", regs.Es) 132 fmt.Fprintf(&m, "\tFs\t = %016x\n", regs.Fs) 133 fmt.Fprintf(&m, "\tGs\t = %016x\n", regs.Gs) 134 135 return m.String() 136 } 137 138 // adjustInitregsRip adjust the current register RIP value to 139 // be just before the system call instruction excution 140 func (t *thread) adjustInitRegsRip() { 141 t.initRegs.Rip -= initRegsRipAdjustment 142 } 143 144 // Pass the expected PPID to the child via R15 when creating stub process. 145 func initChildProcessPPID(initregs *arch.Registers, ppid int32) { 146 initregs.R15 = uint64(ppid) 147 // Rbx has to be set to 1 when creating stub process. 148 initregs.Rbx = 1 149 } 150 151 // patchSignalInfo patches the signal info to account for hitting the seccomp 152 // filters from vsyscall emulation, specified below. We allow for SIGSYS as a 153 // synchronous trap, but patch the structure to appear like a SIGSEGV with the 154 // Rip as the faulting address. 155 // 156 // Note that this should only be called after verifying that the signalInfo has 157 // been generated by the kernel. 158 func patchSignalInfo(regs *arch.Registers, signalInfo *linux.SignalInfo) { 159 if linux.Signal(signalInfo.Signo) == linux.SIGSYS { 160 signalInfo.Signo = int32(linux.SIGSEGV) 161 162 // Unwind the kernel emulation, if any has occurred. A SIGSYS is delivered 163 // with the si_call_addr field pointing to the current RIP. This field 164 // aligns with the si_addr field for a SIGSEGV, so we don't need to touch 165 // anything there. We do need to unwind emulation however, so we set the 166 // instruction pointer to the faulting value, and "unpop" the stack. 167 regs.Rip = signalInfo.Addr() 168 regs.Rsp -= 8 169 } 170 } 171 172 // enableCpuidFault enables cpuid-faulting. 173 // 174 // This may fail on older kernels or hardware, so we just disregard the result. 175 // Host CPUID will be enabled. 176 // 177 // This is safe to call in an afterFork context. 178 // 179 //go:nosplit 180 func enableCpuidFault() { 181 unix.RawSyscall6(unix.SYS_ARCH_PRCTL, linux.ARCH_SET_CPUID, 0, 0, 0, 0, 0) 182 } 183 184 // appendArchSeccompRules append architecture specific seccomp rules when creating BPF program. 185 // Ref attachedThread() for more detail. 186 func appendArchSeccompRules(rules []seccomp.RuleSet, defaultAction linux.BPFAction) []seccomp.RuleSet { 187 rules = append(rules, 188 // Rules for trapping vsyscall access. 189 seccomp.RuleSet{ 190 Rules: seccomp.SyscallRules{ 191 unix.SYS_GETTIMEOFDAY: {}, 192 unix.SYS_TIME: {}, 193 unix.SYS_GETCPU: {}, // SYS_GETCPU was not defined in package syscall on amd64. 194 }, 195 Action: linux.SECCOMP_RET_TRAP, 196 Vsyscall: true, 197 }) 198 if defaultAction != linux.SECCOMP_RET_ALLOW { 199 rules = append(rules, 200 seccomp.RuleSet{ 201 Rules: seccomp.SyscallRules{ 202 unix.SYS_ARCH_PRCTL: []seccomp.Rule{ 203 {seccomp.EqualTo(linux.ARCH_SET_CPUID), seccomp.EqualTo(0)}, 204 }, 205 }, 206 Action: linux.SECCOMP_RET_ALLOW, 207 }) 208 } 209 return rules 210 } 211 212 // probeSeccomp returns true iff seccomp is run after ptrace notifications, 213 // which is generally the case for kernel version >= 4.8. This check is dynamic 214 // because kernels have be backported behavior. 215 // 216 // See createStub for more information. 217 // 218 // Precondition: the runtime OS thread must be locked. 219 func probeSeccomp() bool { 220 // Create a completely new, destroyable process. 221 t, err := attachedThread(0, linux.SECCOMP_RET_ERRNO) 222 if err != nil { 223 panic(fmt.Sprintf("seccomp probe failed: %v", err)) 224 } 225 defer t.destroy() 226 227 // Set registers to the yield system call. This call is not allowed 228 // by the filters specified in the attachThread function. 229 regs := createSyscallRegs(&t.initRegs, unix.SYS_SCHED_YIELD) 230 if err := t.setRegs(®s); err != nil { 231 panic(fmt.Sprintf("ptrace set regs failed: %v", err)) 232 } 233 234 for { 235 // Attempt an emulation. 236 if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_SYSEMU, uintptr(t.tid), 0, 0, 0, 0); errno != 0 { 237 panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno)) 238 } 239 240 sig := t.wait(stopped) 241 if sig == (syscallEvent | unix.SIGTRAP) { 242 // Did the seccomp errno hook already run? This would 243 // indicate that seccomp is first in line and we're 244 // less than 4.8. 245 if err := t.getRegs(®s); err != nil { 246 panic(fmt.Sprintf("ptrace get-regs failed: %v", err)) 247 } 248 if _, err := syscallReturnValue(®s); err == nil { 249 // The seccomp errno mode ran first, and reset 250 // the error in the registers. 251 return false 252 } 253 // The seccomp hook did not run yet, and therefore it 254 // is safe to use RET_KILL mode for dispatched calls. 255 return true 256 } 257 } 258 } 259 260 func (s *subprocess) arm64SyscallWorkaround(t *thread, regs *arch.Registers) { 261 }