github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/platform/ptrace/subprocess_amd64.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build amd64
    16  
    17  package ptrace
    18  
    19  import (
    20  	"fmt"
    21  	"strings"
    22  
    23  	"golang.org/x/sys/unix"
    24  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    25  	"github.com/SagerNet/gvisor/pkg/seccomp"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    27  )
    28  
    29  const (
    30  	// maximumUserAddress is the largest possible user address.
    31  	maximumUserAddress = 0x7ffffffff000
    32  
    33  	// stubInitAddress is the initial attempt link address for the stub.
    34  	stubInitAddress = 0x7fffffff0000
    35  
    36  	// initRegsRipAdjustment is the size of the syscall instruction.
    37  	initRegsRipAdjustment = 2
    38  )
    39  
    40  // resetSysemuRegs sets up emulation registers.
    41  //
    42  // This should be called prior to calling sysemu.
    43  func (t *thread) resetSysemuRegs(regs *arch.Registers) {
    44  	regs.Cs = t.initRegs.Cs
    45  	regs.Ss = t.initRegs.Ss
    46  	regs.Ds = t.initRegs.Ds
    47  	regs.Es = t.initRegs.Es
    48  	regs.Fs = t.initRegs.Fs
    49  	regs.Gs = t.initRegs.Gs
    50  }
    51  
    52  // createSyscallRegs sets up syscall registers.
    53  //
    54  // This should be called to generate registers for a system call.
    55  func createSyscallRegs(initRegs *arch.Registers, sysno uintptr, args ...arch.SyscallArgument) arch.Registers {
    56  	// Copy initial registers.
    57  	regs := *initRegs
    58  
    59  	// Set our syscall number.
    60  	regs.Rax = uint64(sysno)
    61  	if len(args) >= 1 {
    62  		regs.Rdi = args[0].Uint64()
    63  	}
    64  	if len(args) >= 2 {
    65  		regs.Rsi = args[1].Uint64()
    66  	}
    67  	if len(args) >= 3 {
    68  		regs.Rdx = args[2].Uint64()
    69  	}
    70  	if len(args) >= 4 {
    71  		regs.R10 = args[3].Uint64()
    72  	}
    73  	if len(args) >= 5 {
    74  		regs.R8 = args[4].Uint64()
    75  	}
    76  	if len(args) >= 6 {
    77  		regs.R9 = args[5].Uint64()
    78  	}
    79  
    80  	return regs
    81  }
    82  
    83  // isSingleStepping determines if the registers indicate single-stepping.
    84  func isSingleStepping(regs *arch.Registers) bool {
    85  	return (regs.Eflags & arch.X86TrapFlag) != 0
    86  }
    87  
    88  // updateSyscallRegs updates registers after finishing sysemu.
    89  func updateSyscallRegs(regs *arch.Registers) {
    90  	// Ptrace puts -ENOSYS in rax on syscall-enter-stop.
    91  	regs.Rax = regs.Orig_rax
    92  }
    93  
    94  // syscallReturnValue extracts a sensible return from registers.
    95  func syscallReturnValue(regs *arch.Registers) (uintptr, error) {
    96  	rval := int64(regs.Rax)
    97  	if rval < 0 {
    98  		return 0, unix.Errno(-rval)
    99  	}
   100  	return uintptr(rval), nil
   101  }
   102  
   103  func dumpRegs(regs *arch.Registers) string {
   104  	var m strings.Builder
   105  
   106  	fmt.Fprintf(&m, "Registers:\n")
   107  	fmt.Fprintf(&m, "\tR15\t = %016x\n", regs.R15)
   108  	fmt.Fprintf(&m, "\tR14\t = %016x\n", regs.R14)
   109  	fmt.Fprintf(&m, "\tR13\t = %016x\n", regs.R13)
   110  	fmt.Fprintf(&m, "\tR12\t = %016x\n", regs.R12)
   111  	fmt.Fprintf(&m, "\tRbp\t = %016x\n", regs.Rbp)
   112  	fmt.Fprintf(&m, "\tRbx\t = %016x\n", regs.Rbx)
   113  	fmt.Fprintf(&m, "\tR11\t = %016x\n", regs.R11)
   114  	fmt.Fprintf(&m, "\tR10\t = %016x\n", regs.R10)
   115  	fmt.Fprintf(&m, "\tR9\t = %016x\n", regs.R9)
   116  	fmt.Fprintf(&m, "\tR8\t = %016x\n", regs.R8)
   117  	fmt.Fprintf(&m, "\tRax\t = %016x\n", regs.Rax)
   118  	fmt.Fprintf(&m, "\tRcx\t = %016x\n", regs.Rcx)
   119  	fmt.Fprintf(&m, "\tRdx\t = %016x\n", regs.Rdx)
   120  	fmt.Fprintf(&m, "\tRsi\t = %016x\n", regs.Rsi)
   121  	fmt.Fprintf(&m, "\tRdi\t = %016x\n", regs.Rdi)
   122  	fmt.Fprintf(&m, "\tOrig_rax = %016x\n", regs.Orig_rax)
   123  	fmt.Fprintf(&m, "\tRip\t = %016x\n", regs.Rip)
   124  	fmt.Fprintf(&m, "\tCs\t = %016x\n", regs.Cs)
   125  	fmt.Fprintf(&m, "\tEflags\t = %016x\n", regs.Eflags)
   126  	fmt.Fprintf(&m, "\tRsp\t = %016x\n", regs.Rsp)
   127  	fmt.Fprintf(&m, "\tSs\t = %016x\n", regs.Ss)
   128  	fmt.Fprintf(&m, "\tFs_base\t = %016x\n", regs.Fs_base)
   129  	fmt.Fprintf(&m, "\tGs_base\t = %016x\n", regs.Gs_base)
   130  	fmt.Fprintf(&m, "\tDs\t = %016x\n", regs.Ds)
   131  	fmt.Fprintf(&m, "\tEs\t = %016x\n", regs.Es)
   132  	fmt.Fprintf(&m, "\tFs\t = %016x\n", regs.Fs)
   133  	fmt.Fprintf(&m, "\tGs\t = %016x\n", regs.Gs)
   134  
   135  	return m.String()
   136  }
   137  
   138  // adjustInitregsRip adjust the current register RIP value to
   139  // be just before the system call instruction excution
   140  func (t *thread) adjustInitRegsRip() {
   141  	t.initRegs.Rip -= initRegsRipAdjustment
   142  }
   143  
   144  // Pass the expected PPID to the child via R15 when creating stub process.
   145  func initChildProcessPPID(initregs *arch.Registers, ppid int32) {
   146  	initregs.R15 = uint64(ppid)
   147  	// Rbx has to be set to 1 when creating stub process.
   148  	initregs.Rbx = 1
   149  }
   150  
   151  // patchSignalInfo patches the signal info to account for hitting the seccomp
   152  // filters from vsyscall emulation, specified below. We allow for SIGSYS as a
   153  // synchronous trap, but patch the structure to appear like a SIGSEGV with the
   154  // Rip as the faulting address.
   155  //
   156  // Note that this should only be called after verifying that the signalInfo has
   157  // been generated by the kernel.
   158  func patchSignalInfo(regs *arch.Registers, signalInfo *linux.SignalInfo) {
   159  	if linux.Signal(signalInfo.Signo) == linux.SIGSYS {
   160  		signalInfo.Signo = int32(linux.SIGSEGV)
   161  
   162  		// Unwind the kernel emulation, if any has occurred. A SIGSYS is delivered
   163  		// with the si_call_addr field pointing to the current RIP. This field
   164  		// aligns with the si_addr field for a SIGSEGV, so we don't need to touch
   165  		// anything there. We do need to unwind emulation however, so we set the
   166  		// instruction pointer to the faulting value, and "unpop" the stack.
   167  		regs.Rip = signalInfo.Addr()
   168  		regs.Rsp -= 8
   169  	}
   170  }
   171  
   172  // enableCpuidFault enables cpuid-faulting.
   173  //
   174  // This may fail on older kernels or hardware, so we just disregard the result.
   175  // Host CPUID will be enabled.
   176  //
   177  // This is safe to call in an afterFork context.
   178  //
   179  //go:nosplit
   180  func enableCpuidFault() {
   181  	unix.RawSyscall6(unix.SYS_ARCH_PRCTL, linux.ARCH_SET_CPUID, 0, 0, 0, 0, 0)
   182  }
   183  
   184  // appendArchSeccompRules append architecture specific seccomp rules when creating BPF program.
   185  // Ref attachedThread() for more detail.
   186  func appendArchSeccompRules(rules []seccomp.RuleSet, defaultAction linux.BPFAction) []seccomp.RuleSet {
   187  	rules = append(rules,
   188  		// Rules for trapping vsyscall access.
   189  		seccomp.RuleSet{
   190  			Rules: seccomp.SyscallRules{
   191  				unix.SYS_GETTIMEOFDAY: {},
   192  				unix.SYS_TIME:         {},
   193  				unix.SYS_GETCPU:       {}, // SYS_GETCPU was not defined in package syscall on amd64.
   194  			},
   195  			Action:   linux.SECCOMP_RET_TRAP,
   196  			Vsyscall: true,
   197  		})
   198  	if defaultAction != linux.SECCOMP_RET_ALLOW {
   199  		rules = append(rules,
   200  			seccomp.RuleSet{
   201  				Rules: seccomp.SyscallRules{
   202  					unix.SYS_ARCH_PRCTL: []seccomp.Rule{
   203  						{seccomp.EqualTo(linux.ARCH_SET_CPUID), seccomp.EqualTo(0)},
   204  					},
   205  				},
   206  				Action: linux.SECCOMP_RET_ALLOW,
   207  			})
   208  	}
   209  	return rules
   210  }
   211  
   212  // probeSeccomp returns true iff seccomp is run after ptrace notifications,
   213  // which is generally the case for kernel version >= 4.8. This check is dynamic
   214  // because kernels have be backported behavior.
   215  //
   216  // See createStub for more information.
   217  //
   218  // Precondition: the runtime OS thread must be locked.
   219  func probeSeccomp() bool {
   220  	// Create a completely new, destroyable process.
   221  	t, err := attachedThread(0, linux.SECCOMP_RET_ERRNO)
   222  	if err != nil {
   223  		panic(fmt.Sprintf("seccomp probe failed: %v", err))
   224  	}
   225  	defer t.destroy()
   226  
   227  	// Set registers to the yield system call. This call is not allowed
   228  	// by the filters specified in the attachThread function.
   229  	regs := createSyscallRegs(&t.initRegs, unix.SYS_SCHED_YIELD)
   230  	if err := t.setRegs(&regs); err != nil {
   231  		panic(fmt.Sprintf("ptrace set regs failed: %v", err))
   232  	}
   233  
   234  	for {
   235  		// Attempt an emulation.
   236  		if _, _, errno := unix.RawSyscall6(unix.SYS_PTRACE, unix.PTRACE_SYSEMU, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
   237  			panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
   238  		}
   239  
   240  		sig := t.wait(stopped)
   241  		if sig == (syscallEvent | unix.SIGTRAP) {
   242  			// Did the seccomp errno hook already run? This would
   243  			// indicate that seccomp is first in line and we're
   244  			// less than 4.8.
   245  			if err := t.getRegs(&regs); err != nil {
   246  				panic(fmt.Sprintf("ptrace get-regs failed: %v", err))
   247  			}
   248  			if _, err := syscallReturnValue(&regs); err == nil {
   249  				// The seccomp errno mode ran first, and reset
   250  				// the error in the registers.
   251  				return false
   252  			}
   253  			// The seccomp hook did not run yet, and therefore it
   254  			// is safe to use RET_KILL mode for dispatched calls.
   255  			return true
   256  		}
   257  	}
   258  }
   259  
   260  func (s *subprocess) arm64SyscallWorkaround(t *thread, regs *arch.Registers) {
   261  }