github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/ring0/entry_amd64.s (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include "funcdata.h"
    16  #include "textflag.h"
    17  
    18  // NB: Offsets are programmatically generated (see BUILD).
    19  //
    20  // This file is concatenated with the definitions.
    21  
    22  // Saves a register set.
    23  //
    24  // This is a macro because it may need to executed in contents where a stack is
    25  // not available for calls.
    26  //
    27  // The following registers are not saved: AX, SP, IP, FLAGS, all segments.
    28  #define REGISTERS_SAVE(reg, offset) \
    29    MOVQ R15, offset+PTRACE_R15(reg); \
    30    MOVQ R14, offset+PTRACE_R14(reg); \
    31    MOVQ R13, offset+PTRACE_R13(reg); \
    32    MOVQ R12, offset+PTRACE_R12(reg); \
    33    MOVQ BP,  offset+PTRACE_RBP(reg); \
    34    MOVQ BX,  offset+PTRACE_RBX(reg); \
    35    MOVQ CX,  offset+PTRACE_RCX(reg); \
    36    MOVQ DX,  offset+PTRACE_RDX(reg); \
    37    MOVQ R11, offset+PTRACE_R11(reg); \
    38    MOVQ R10, offset+PTRACE_R10(reg); \
    39    MOVQ R9,  offset+PTRACE_R9(reg); \
    40    MOVQ R8,  offset+PTRACE_R8(reg); \
    41    MOVQ SI,  offset+PTRACE_RSI(reg); \
    42    MOVQ DI,  offset+PTRACE_RDI(reg);
    43  
    44  // Loads a register set.
    45  //
    46  // This is a macro because it may need to executed in contents where a stack is
    47  // not available for calls.
    48  //
    49  // The following registers are not loaded: AX, SP, IP, FLAGS, all segments.
    50  #define REGISTERS_LOAD(reg, offset) \
    51    MOVQ offset+PTRACE_R15(reg), R15; \
    52    MOVQ offset+PTRACE_R14(reg), R14; \
    53    MOVQ offset+PTRACE_R13(reg), R13; \
    54    MOVQ offset+PTRACE_R12(reg), R12; \
    55    MOVQ offset+PTRACE_RBP(reg), BP; \
    56    MOVQ offset+PTRACE_RBX(reg), BX; \
    57    MOVQ offset+PTRACE_RCX(reg), CX; \
    58    MOVQ offset+PTRACE_RDX(reg), DX; \
    59    MOVQ offset+PTRACE_R11(reg), R11; \
    60    MOVQ offset+PTRACE_R10(reg), R10; \
    61    MOVQ offset+PTRACE_R9(reg),  R9; \
    62    MOVQ offset+PTRACE_R8(reg),  R8; \
    63    MOVQ offset+PTRACE_RSI(reg), SI; \
    64    MOVQ offset+PTRACE_RDI(reg), DI;
    65  
    66  // WRITE_CR3() writes the given CR3 value.
    67  //
    68  // The code corresponds to:
    69  //
    70  //     mov %rax, %cr3
    71  //
    72  #define WRITE_CR3() \
    73  	BYTE $0x0f; BYTE $0x22; BYTE $0xd8;
    74  
    75  // SWAP_GS swaps the kernel GS (CPU).
    76  #define SWAP_GS() \
    77  	BYTE $0x0F; BYTE $0x01; BYTE $0xf8;
    78  
    79  // IRET returns from an interrupt frame.
    80  #define IRET() \
    81  	BYTE $0x48; BYTE $0xcf;
    82  
    83  // SYSRET64 executes the sysret instruction.
    84  #define SYSRET64() \
    85  	BYTE $0x48; BYTE $0x0f; BYTE $0x07;
    86  
    87  // LOAD_KERNEL_STACK loads the kernel stack.
    88  #define LOAD_KERNEL_STACK(entry) \
    89  	MOVQ ENTRY_STACK_TOP(entry), SP;
    90  
    91  // See kernel.go.
    92  TEXT ·Halt(SB),NOSPLIT,$0
    93  	HLT
    94  	RET
    95  
    96  // See entry_amd64.go.
    97  TEXT ·swapgs(SB),NOSPLIT,$0
    98  	SWAP_GS()
    99  	RET
   100  
   101  // jumpToKernel changes execution to the kernel address space.
   102  //
   103  // This works by changing the return value to the kernel version.
   104  TEXT ·jumpToKernel(SB),NOSPLIT,$0
   105  	MOVQ 0(SP), AX
   106  	ORQ ·KernelStartAddress(SB), AX // Future return value.
   107  	MOVQ AX, 0(SP)
   108  	RET
   109  
   110  // See entry_amd64.go.
   111  TEXT ·sysret(SB),NOSPLIT,$0-24
   112  	CALL ·jumpToKernel(SB)
   113  	// Save original state and stack. sysenter() or exception()
   114  	// from APP(gr3) will switch to this stack, set the return
   115  	// value (vector: 32(SP)) and then do RET, which will also
   116  	// automatically return to the lower half.
   117  	MOVQ cpu+0(FP), BX
   118  	MOVQ regs+8(FP), AX
   119  	MOVQ userCR3+16(FP), CX
   120  	MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX)
   121  	MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX)
   122  	MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX)
   123  
   124  	// save SP AX userCR3 on the kernel stack.
   125  	MOVQ CPU_ENTRY(BX), BX
   126  	LOAD_KERNEL_STACK(BX)
   127  	PUSHQ PTRACE_RSP(AX)
   128  	PUSHQ PTRACE_RAX(AX)
   129  	PUSHQ CX
   130  
   131  	// Restore user register state.
   132  	REGISTERS_LOAD(AX, 0)
   133  	MOVQ PTRACE_RIP(AX), CX    // Needed for SYSRET.
   134  	MOVQ PTRACE_FLAGS(AX), R11 // Needed for SYSRET.
   135  
   136  	// restore userCR3, AX, SP.
   137  	POPQ AX	                            // Get userCR3.
   138  	WRITE_CR3()                         // Switch to userCR3.
   139  	POPQ AX                             // Restore AX.
   140  	POPQ SP                             // Restore SP.
   141  	SYSRET64()
   142  
   143  // See entry_amd64.go.
   144  TEXT ·iret(SB),NOSPLIT,$0-24
   145  	CALL ·jumpToKernel(SB)
   146  	// Save original state and stack. sysenter() or exception()
   147  	// from APP(gr3) will switch to this stack, set the return
   148  	// value (vector: 32(SP)) and then do RET, which will also
   149  	// automatically return to the lower half.
   150  	MOVQ cpu+0(FP), BX
   151  	MOVQ regs+8(FP), AX
   152  	MOVQ userCR3+16(FP), CX
   153  	MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX)
   154  	MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX)
   155  	MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX)
   156  
   157  	// Build an IRET frame & restore state.
   158  	MOVQ CPU_ENTRY(BX), BX
   159  	LOAD_KERNEL_STACK(BX)
   160  	PUSHQ PTRACE_SS(AX)
   161  	PUSHQ PTRACE_RSP(AX)
   162  	PUSHQ PTRACE_FLAGS(AX)
   163  	PUSHQ PTRACE_CS(AX)
   164  	PUSHQ PTRACE_RIP(AX)
   165  	PUSHQ PTRACE_RAX(AX)                // Save AX on kernel stack.
   166  	PUSHQ CX                            // Save userCR3 on kernel stack.
   167  	REGISTERS_LOAD(AX, 0)               // Restore most registers.
   168  	POPQ AX	                            // Get userCR3.
   169  	WRITE_CR3()                         // Switch to userCR3.
   170  	POPQ AX                             // Restore AX.
   171  	IRET()
   172  
   173  // See entry_amd64.go.
   174  TEXT ·resume(SB),NOSPLIT,$0
   175  	// See iret, above.
   176  	MOVQ ENTRY_CPU_SELF(GS), AX                 // Load vCPU.
   177  	PUSHQ CPU_REGISTERS+PTRACE_SS(AX)
   178  	PUSHQ CPU_REGISTERS+PTRACE_RSP(AX)
   179  	PUSHQ CPU_REGISTERS+PTRACE_FLAGS(AX)
   180  	PUSHQ CPU_REGISTERS+PTRACE_CS(AX)
   181  	PUSHQ CPU_REGISTERS+PTRACE_RIP(AX)
   182  	REGISTERS_LOAD(AX, CPU_REGISTERS)
   183  	MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX
   184  	IRET()
   185  
   186  // See entry_amd64.go.
   187  TEXT ·Start(SB),NOSPLIT,$0
   188  	PUSHQ $0x0            // Previous frame pointer.
   189  	MOVQ SP, BP           // Set frame pointer.
   190  	PUSHQ AX              // First argument (CPU).
   191  	CALL ·start(SB)       // Call Go hook.
   192  	JMP ·resume(SB)       // Restore to registers.
   193  
   194  // See entry_amd64.go.
   195  TEXT ·sysenter(SB),NOSPLIT,$0
   196  	// _RFLAGS_IOPL0 is always set in the user mode and it is never set in
   197  	// the kernel mode. See the comment of UserFlagsSet for more details.
   198  	TESTL $_RFLAGS_IOPL0, R11
   199  	JZ kernel
   200  user:
   201  	SWAP_GS()
   202  	MOVQ AX, ENTRY_SCRATCH0(GS)            // Save user AX on scratch.
   203  	MOVQ ENTRY_KERNEL_CR3(GS), AX          // Get kernel cr3 on AX.
   204  	WRITE_CR3()                            // Switch to kernel cr3.
   205  
   206  	MOVQ ENTRY_CPU_SELF(GS), AX            // Load vCPU.
   207  	MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX  // Get user regs.
   208  	REGISTERS_SAVE(AX, 0)                  // Save all except IP, FLAGS, SP, AX.
   209  	MOVQ CX,  PTRACE_RIP(AX)
   210  	MOVQ R11, PTRACE_FLAGS(AX)
   211  	MOVQ SP,  PTRACE_RSP(AX)
   212  	MOVQ ENTRY_SCRATCH0(GS), CX            // Load saved user AX value.
   213  	MOVQ CX,  PTRACE_RAX(AX)               // Save everything else.
   214  	MOVQ CX,  PTRACE_ORIGRAX(AX)
   215  
   216  	MOVQ ENTRY_CPU_SELF(GS), AX            // Load vCPU.
   217  	MOVQ CPU_REGISTERS+PTRACE_RSP(AX), SP  // Get stacks.
   218  	MOVQ $0, CPU_ERROR_CODE(AX)            // Clear error code.
   219  	MOVQ $1, CPU_ERROR_TYPE(AX)            // Set error type to user.
   220  
   221  	// Return to the kernel, where the frame is:
   222  	//
   223  	//	vector      (sp+32)
   224  	//	userCR3     (sp+24)
   225  	// 	regs        (sp+16)
   226  	// 	cpu         (sp+8)
   227  	// 	vcpu.Switch (sp+0)
   228  	//
   229  	MOVQ CPU_REGISTERS+PTRACE_RBP(AX), BP // Original base pointer.
   230  	MOVQ $Syscall, 32(SP)                 // Output vector.
   231  	RET
   232  
   233  kernel:
   234  	// We can't restore the original stack, but we can access the registers
   235  	// in the CPU state directly. No need for temporary juggling.
   236  	MOVQ AX,  ENTRY_SCRATCH0(GS)
   237  	MOVQ ENTRY_CPU_SELF(GS), AX                 // Load vCPU.
   238  	REGISTERS_SAVE(AX, CPU_REGISTERS)
   239  	MOVQ CX,  CPU_REGISTERS+PTRACE_RIP(AX)
   240  	MOVQ R11, CPU_REGISTERS+PTRACE_FLAGS(AX)
   241  	MOVQ SP,  CPU_REGISTERS+PTRACE_RSP(AX)
   242  	MOVQ ENTRY_SCRATCH0(GS), BX
   243  	MOVQ BX,  CPU_REGISTERS+PTRACE_ORIGRAX(AX)
   244  	MOVQ BX,  CPU_REGISTERS+PTRACE_RAX(AX)
   245  	MOVQ $0,  CPU_ERROR_CODE(AX)                // Clear error code.
   246  	MOVQ $0,  CPU_ERROR_TYPE(AX)                // Set error type to kernel.
   247  
   248  	// Call the syscall trampoline.
   249  	LOAD_KERNEL_STACK(GS)
   250  	PUSHQ AX                // First argument (vCPU).
   251  	CALL ·kernelSyscall(SB) // Call the trampoline.
   252  	POPQ AX                 // Pop vCPU.
   253  	JMP ·resume(SB)
   254  
   255  // exception is a generic exception handler.
   256  //
   257  // There are two cases handled:
   258  //
   259  // 1) An exception in kernel mode: this results in saving the state at the time
   260  // of the exception and calling the defined hook.
   261  //
   262  // 2) An exception in guest mode: the original kernel frame is restored, and
   263  // the vector & error codes are pushed as return values.
   264  //
   265  // See below for the stubs that call exception.
   266  TEXT ·exception(SB),NOSPLIT,$0
   267  	// Determine whether the exception occurred in kernel mode or user
   268  	// mode, based on the flags. We expect the following stack:
   269  	//
   270  	//	SS          (sp+48)
   271  	//	SP          (sp+40)
   272  	//	FLAGS       (sp+32)
   273  	//	CS          (sp+24)
   274  	//	IP          (sp+16)
   275  	//	ERROR_CODE  (sp+8)
   276  	//	VECTOR      (sp+0)
   277  	//
   278  	TESTL $_RFLAGS_IOPL0, 32(SP)
   279  	JZ kernel
   280  
   281  user:
   282  	SWAP_GS()
   283  	ADDQ $-8, SP                            // Adjust for flags.
   284  	MOVQ $_KERNEL_FLAGS, 0(SP); BYTE $0x9d; // Reset flags (POPFQ).
   285  	PUSHQ AX                                // Save user AX on stack.
   286  	MOVQ ENTRY_KERNEL_CR3(GS), AX           // Get kernel cr3 on AX.
   287  	WRITE_CR3()                             // Switch to kernel cr3.
   288  
   289  	MOVQ ENTRY_CPU_SELF(GS), AX             // Load vCPU.
   290  	MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX   // Get user regs.
   291  	REGISTERS_SAVE(AX, 0)                   // Save all except IP, FLAGS, SP, AX.
   292  	POPQ BX                                 // Restore original AX.
   293  	MOVQ BX, PTRACE_RAX(AX)                 // Save it.
   294  	MOVQ BX, PTRACE_ORIGRAX(AX)
   295  	MOVQ 16(SP), BX; MOVQ BX, PTRACE_RIP(AX)
   296  	MOVQ 24(SP), CX; MOVQ CX, PTRACE_CS(AX)
   297  	MOVQ 32(SP), DX; MOVQ DX, PTRACE_FLAGS(AX)
   298  	MOVQ 40(SP), DI; MOVQ DI, PTRACE_RSP(AX)
   299  	MOVQ 48(SP), SI; MOVQ SI, PTRACE_SS(AX)
   300  
   301  	// Copy out and return.
   302  	MOVQ ENTRY_CPU_SELF(GS), AX           // Load vCPU.
   303  	MOVQ 0(SP), BX                        // Load vector.
   304  	MOVQ 8(SP), CX                        // Load error code.
   305  	MOVQ CPU_REGISTERS+PTRACE_RSP(AX), SP // Original stack (kernel version).
   306  	MOVQ CPU_REGISTERS+PTRACE_RBP(AX), BP // Original base pointer.
   307  	MOVQ CX, CPU_ERROR_CODE(AX)           // Set error code.
   308  	MOVQ $1, CPU_ERROR_TYPE(AX)           // Set error type to user.
   309  	MOVQ BX, 32(SP)                       // Output vector.
   310  	RET
   311  
   312  kernel:
   313  	// As per above, we can save directly.
   314  	PUSHQ AX
   315  	MOVQ ENTRY_CPU_SELF(GS), AX                        // Load vCPU.
   316  	REGISTERS_SAVE(AX, CPU_REGISTERS)
   317  	POPQ BX
   318  	MOVQ BX, CPU_REGISTERS+PTRACE_RAX(AX)
   319  	MOVQ BX, CPU_REGISTERS+PTRACE_ORIGRAX(AX)
   320  	MOVQ 16(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_RIP(AX)
   321  	MOVQ 32(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_FLAGS(AX)
   322  	MOVQ 40(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_RSP(AX)
   323  
   324  	// Set the error code and adjust the stack.
   325  	MOVQ 8(SP), BX              // Load the error code.
   326  	MOVQ BX, CPU_ERROR_CODE(AX) // Copy out to the CPU.
   327  	MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel.
   328  	MOVQ 0(SP), BX              // BX contains the vector.
   329  
   330  	// Call the exception trampoline.
   331  	LOAD_KERNEL_STACK(GS)
   332  	PUSHQ BX                  // Second argument (vector).
   333  	PUSHQ AX                  // First argument (vCPU).
   334  	CALL ·kernelException(SB) // Call the trampoline.
   335  	POPQ BX                   // Pop vector.
   336  	POPQ AX                   // Pop vCPU.
   337  	JMP ·resume(SB)
   338  
   339  #define EXCEPTION_WITH_ERROR(value, symbol) \
   340  TEXT symbol,NOSPLIT,$0; \
   341  	PUSHQ $value; \
   342  	JMP ·exception(SB);
   343  
   344  #define EXCEPTION_WITHOUT_ERROR(value, symbol) \
   345  TEXT symbol,NOSPLIT,$0; \
   346  	PUSHQ $0x0; \
   347  	PUSHQ $value; \
   348  	JMP ·exception(SB);
   349  
   350  EXCEPTION_WITHOUT_ERROR(DivideByZero, ·divideByZero(SB))
   351  EXCEPTION_WITHOUT_ERROR(Debug, ·debug(SB))
   352  EXCEPTION_WITHOUT_ERROR(NMI, ·nmi(SB))
   353  EXCEPTION_WITHOUT_ERROR(Breakpoint, ·breakpoint(SB))
   354  EXCEPTION_WITHOUT_ERROR(Overflow, ·overflow(SB))
   355  EXCEPTION_WITHOUT_ERROR(BoundRangeExceeded, ·boundRangeExceeded(SB))
   356  EXCEPTION_WITHOUT_ERROR(InvalidOpcode, ·invalidOpcode(SB))
   357  EXCEPTION_WITHOUT_ERROR(DeviceNotAvailable, ·deviceNotAvailable(SB))
   358  EXCEPTION_WITH_ERROR(DoubleFault, ·doubleFault(SB))
   359  EXCEPTION_WITHOUT_ERROR(CoprocessorSegmentOverrun, ·coprocessorSegmentOverrun(SB))
   360  EXCEPTION_WITH_ERROR(InvalidTSS, ·invalidTSS(SB))
   361  EXCEPTION_WITH_ERROR(SegmentNotPresent, ·segmentNotPresent(SB))
   362  EXCEPTION_WITH_ERROR(StackSegmentFault, ·stackSegmentFault(SB))
   363  EXCEPTION_WITH_ERROR(GeneralProtectionFault, ·generalProtectionFault(SB))
   364  EXCEPTION_WITH_ERROR(PageFault, ·pageFault(SB))
   365  EXCEPTION_WITHOUT_ERROR(X87FloatingPointException, ·x87FloatingPointException(SB))
   366  EXCEPTION_WITH_ERROR(AlignmentCheck, ·alignmentCheck(SB))
   367  EXCEPTION_WITHOUT_ERROR(MachineCheck, ·machineCheck(SB))
   368  EXCEPTION_WITHOUT_ERROR(SIMDFloatingPointException, ·simdFloatingPointException(SB))
   369  EXCEPTION_WITHOUT_ERROR(VirtualizationException, ·virtualizationException(SB))
   370  EXCEPTION_WITH_ERROR(SecurityException, ·securityException(SB))
   371  EXCEPTION_WITHOUT_ERROR(SyscallInt80, ·syscallInt80(SB))