github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/ring0/entry_amd64.s (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "funcdata.h" 16 #include "textflag.h" 17 18 // CPU offsets. 19 #define CPU_REGISTERS 64 // +checkoffset . CPU.registers 20 #define CPU_FPU_STATE 280 // +checkoffset . CPU.floatingPointState 21 #define CPU_ARCH_STATE 16 // +checkoffset . CPU.CPUArchState 22 #define CPU_ERROR_CODE CPU_ARCH_STATE+0 // +checkoffset . CPUArchState.errorCode 23 #define CPU_ERROR_TYPE CPU_ARCH_STATE+8 // +checkoffset . CPUArchState.errorType 24 #define CPU_VECTOR CPU_ARCH_STATE+16 // +checkoffset . CPUArchState.vector 25 #define CPU_FAULT_ADDR CPU_ARCH_STATE+24 // +checkoffset . CPUArchState.faultAddr 26 #define CPU_ENTRY CPU_ARCH_STATE+32 // +checkoffset . CPUArchState.kernelEntry 27 #define CPU_HAS_XSAVE CPU_ARCH_STATE+40 // +checkoffset . CPUArchState.hasXSAVE 28 #define CPU_HAS_XSAVEOPT CPU_ARCH_STATE+41 // +checkoffset . CPUArchState.hasXSAVEOPT 29 30 #define ENTRY_SCRATCH0 256 // +checkoffset . kernelEntry.scratch0 31 #define ENTRY_STACK_TOP 264 // +checkoffset . kernelEntry.stackTop 32 #define ENTRY_CPU_SELF 272 // +checkoffset . kernelEntry.cpuSelf 33 #define ENTRY_KERNEL_CR3 280 // +checkoffset . kernelEntry.kernelCR3 34 35 // Bits. 36 #define _RFLAGS_IF 512 // +checkconst . _RFLAGS_IF 37 #define _RFLAGS_IOPL0 4096 // +checkconst . _RFLAGS_IOPL0 38 #define _KERNEL_FLAGS 2 // +checkconst . KernelFlagsSet 39 40 // Vectors. 41 #define DivideByZero 0 // +checkconst . DivideByZero 42 #define Debug 1 // +checkconst . Debug 43 #define NMI 2 // +checkconst . NMI 44 #define Breakpoint 3 // +checkconst . Breakpoint 45 #define Overflow 4 // +checkconst . Overflow 46 #define BoundRangeExceeded 5 // +checkconst . BoundRangeExceeded 47 #define InvalidOpcode 6 // +checkconst . InvalidOpcode 48 #define DeviceNotAvailable 7 // +checkconst . DeviceNotAvailable 49 #define DoubleFault 8 // +checkconst . DoubleFault 50 #define CoprocessorSegmentOverrun 9 // +checkconst . CoprocessorSegmentOverrun 51 #define InvalidTSS 10 // +checkconst . InvalidTSS 52 #define SegmentNotPresent 11 // +checkconst . SegmentNotPresent 53 #define StackSegmentFault 12 // +checkconst . StackSegmentFault 54 #define GeneralProtectionFault 13 // +checkconst . GeneralProtectionFault 55 #define PageFault 14 // +checkconst . PageFault 56 #define X87FloatingPointException 16 // +checkconst . X87FloatingPointException 57 #define AlignmentCheck 17 // +checkconst . AlignmentCheck 58 #define MachineCheck 18 // +checkconst . MachineCheck 59 #define SIMDFloatingPointException 19 // +checkconst . SIMDFloatingPointException 60 #define VirtualizationException 20 // +checkconst . VirtualizationException 61 #define SecurityException 30 // +checkconst . SecurityException 62 #define SyscallInt80 128 // +checkconst . SyscallInt80 63 #define Syscall 256 // +checkconst . Syscall 64 65 #define PTRACE_R15 0 // +checkoffset linux PtraceRegs.R15 66 #define PTRACE_R14 8 // +checkoffset linux PtraceRegs.R14 67 #define PTRACE_R13 16 // +checkoffset linux PtraceRegs.R13 68 #define PTRACE_R12 24 // +checkoffset linux PtraceRegs.R12 69 #define PTRACE_RBP 32 // +checkoffset linux PtraceRegs.Rbp 70 #define PTRACE_RBX 40 // +checkoffset linux PtraceRegs.Rbx 71 #define PTRACE_R11 48 // +checkoffset linux PtraceRegs.R11 72 #define PTRACE_R10 56 // +checkoffset linux PtraceRegs.R10 73 #define PTRACE_R9 64 // +checkoffset linux PtraceRegs.R9 74 #define PTRACE_R8 72 // +checkoffset linux PtraceRegs.R8 75 #define PTRACE_RAX 80 // +checkoffset linux PtraceRegs.Rax 76 #define PTRACE_RCX 88 // +checkoffset linux PtraceRegs.Rcx 77 #define PTRACE_RDX 96 // +checkoffset linux PtraceRegs.Rdx 78 #define PTRACE_RSI 104 // +checkoffset linux PtraceRegs.Rsi 79 #define PTRACE_RDI 112 // +checkoffset linux PtraceRegs.Rdi 80 #define PTRACE_ORIGRAX 120 // +checkoffset linux PtraceRegs.Orig_rax 81 #define PTRACE_RIP 128 // +checkoffset linux PtraceRegs.Rip 82 #define PTRACE_CS 136 // +checkoffset linux PtraceRegs.Cs 83 #define PTRACE_FLAGS 144 // +checkoffset linux PtraceRegs.Eflags 84 #define PTRACE_RSP 152 // +checkoffset linux PtraceRegs.Rsp 85 #define PTRACE_SS 160 // +checkoffset linux PtraceRegs.Ss 86 #define PTRACE_FS_BASE 168 // +checkoffset linux PtraceRegs.Fs_base 87 #define PTRACE_GS_BASE 176 // +checkoffset linux PtraceRegs.Gs_base 88 89 // Saves a register set. 90 // 91 // This is a macro because it may need to executed in contents where a stack is 92 // not available for calls. 93 // 94 // The following registers are not saved: AX, SP, IP, FLAGS, all segments. 95 #define REGISTERS_SAVE(reg, offset) \ 96 MOVQ R15, offset+PTRACE_R15(reg); \ 97 MOVQ R14, offset+PTRACE_R14(reg); \ 98 MOVQ R13, offset+PTRACE_R13(reg); \ 99 MOVQ R12, offset+PTRACE_R12(reg); \ 100 MOVQ BP, offset+PTRACE_RBP(reg); \ 101 MOVQ BX, offset+PTRACE_RBX(reg); \ 102 MOVQ CX, offset+PTRACE_RCX(reg); \ 103 MOVQ DX, offset+PTRACE_RDX(reg); \ 104 MOVQ R11, offset+PTRACE_R11(reg); \ 105 MOVQ R10, offset+PTRACE_R10(reg); \ 106 MOVQ R9, offset+PTRACE_R9(reg); \ 107 MOVQ R8, offset+PTRACE_R8(reg); \ 108 MOVQ SI, offset+PTRACE_RSI(reg); \ 109 MOVQ DI, offset+PTRACE_RDI(reg); 110 111 // Loads a register set. 112 // 113 // This is a macro because it may need to executed in contents where a stack is 114 // not available for calls. 115 // 116 // The following registers are not loaded: AX, SP, IP, FLAGS, all segments. 117 #define REGISTERS_LOAD(reg, offset) \ 118 MOVQ offset+PTRACE_R15(reg), R15; \ 119 MOVQ offset+PTRACE_R14(reg), R14; \ 120 MOVQ offset+PTRACE_R13(reg), R13; \ 121 MOVQ offset+PTRACE_R12(reg), R12; \ 122 MOVQ offset+PTRACE_RBP(reg), BP; \ 123 MOVQ offset+PTRACE_RBX(reg), BX; \ 124 MOVQ offset+PTRACE_RCX(reg), CX; \ 125 MOVQ offset+PTRACE_RDX(reg), DX; \ 126 MOVQ offset+PTRACE_R11(reg), R11; \ 127 MOVQ offset+PTRACE_R10(reg), R10; \ 128 MOVQ offset+PTRACE_R9(reg), R9; \ 129 MOVQ offset+PTRACE_R8(reg), R8; \ 130 MOVQ offset+PTRACE_RSI(reg), SI; \ 131 MOVQ offset+PTRACE_RDI(reg), DI; 132 133 // WRITE_CR3() writes the given CR3 value. 134 // 135 // The code corresponds to: 136 // 137 // mov %rax, %cr3 138 // 139 #define WRITE_CR3() \ 140 BYTE $0x0f; BYTE $0x22; BYTE $0xd8; 141 142 // SWAP_GS swaps the kernel GS (CPU). 143 #define SWAP_GS() \ 144 BYTE $0x0F; BYTE $0x01; BYTE $0xf8; 145 146 // IRET returns from an interrupt frame. 147 #define IRET() \ 148 BYTE $0x48; BYTE $0xcf; 149 150 // SYSRET64 executes the sysret instruction. 151 #define SYSRET64() \ 152 BYTE $0x48; BYTE $0x0f; BYTE $0x07; 153 154 // LOAD_KERNEL_STACK loads the kernel stack. 155 #define LOAD_KERNEL_STACK(entry) \ 156 MOVQ ENTRY_STACK_TOP(entry), SP; 157 158 // ADDR_OF_FUNC defines a function named 'name' that returns the address of 159 // 'symbol'. 160 #define ADDR_OF_FUNC(name, symbol) \ 161 TEXT name,$0-8; \ 162 MOVQ $symbol, AX; \ 163 MOVQ AX, ret+0(FP); \ 164 RET 165 166 // See kernel.go. 167 TEXT ·Halt(SB),NOSPLIT|NOFRAME,$0 168 HLT 169 RET 170 171 // See kernel_amd64.go. 172 TEXT ·HaltAndWriteFSBase(SB),NOSPLIT,$8-8 173 HLT 174 175 // Restore FS_BASE. 176 MOVQ regs+0(FP), AX 177 MOVQ PTRACE_FS_BASE(AX), AX 178 179 PUSHQ AX // First argument (FS_BASE) 180 CALL ·writeFS(SB) 181 POPQ AX 182 183 RET 184 185 // jumpToKernel changes execution to the kernel address space. 186 // 187 // This works by changing the return value to the kernel version. 188 TEXT ·jumpToKernel(SB),NOSPLIT|NOFRAME,$0 189 MOVQ 0(SP), AX 190 ORQ ·KernelStartAddress(SB), AX // Future return value. 191 MOVQ AX, 0(SP) 192 RET 193 194 // jumpToUser changes execution to the user address space. 195 // 196 // This works by changing the return value to the user version. 197 TEXT ·jumpToUser(SB),NOSPLIT|NOFRAME,$0 198 // N.B. we can't access KernelStartAddress from the upper half (data 199 // pages not available), so just naively clear all the upper bits. 200 // We are assuming a 47-bit virtual address space. 201 MOVQ $0x00007fffffffffff, AX 202 MOVQ 0(SP), BX 203 ANDQ BX, AX // Future return value. 204 MOVQ AX, 0(SP) 205 RET 206 207 // See kernel_amd64.go. 208 // 209 // The 16-byte frame size is for the saved values of MXCSR and the x87 control 210 // word. 211 TEXT ·doSwitchToUser(SB),NOSPLIT,$16-48 212 // We are passed pointers to heap objects, but do not store them in our 213 // local frame. 214 NO_LOCAL_POINTERS 215 216 // MXCSR and the x87 control word are the only floating point state 217 // that is callee-save and thus we must save. 218 STMXCSR mxcsr-0(SP) 219 FSTCW cw-8(SP) 220 221 // Restore application floating point state. 222 MOVQ cpu+0(FP), SI 223 MOVQ fpState+16(FP), DI 224 MOVB ·hasXSAVE(SB), BX 225 TESTB BX, BX 226 JZ no_xrstor 227 // Use xrstor to restore all available fp state. For now, we restore 228 // everything unconditionally by setting the implicit operand edx:eax 229 // (the "requested feature bitmap") to all 1's. 230 MOVL $0xffffffff, AX 231 MOVL $0xffffffff, DX 232 BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x2f // XRSTOR64 0(DI) 233 JMP fprestore_done 234 no_xrstor: 235 // Fall back to fxrstor if xsave is not available. 236 FXRSTOR64 0(DI) 237 fprestore_done: 238 239 // Set application GS. 240 MOVQ regs+8(FP), R8 241 SWAP_GS() 242 MOVQ PTRACE_GS_BASE(R8), AX 243 PUSHQ AX 244 CALL ·writeGS(SB) 245 POPQ AX 246 247 // Call sysret() or iret(). 248 MOVQ userCR3+24(FP), CX 249 MOVQ needIRET+32(FP), R9 250 ADDQ $-32, SP 251 MOVQ SI, 0(SP) // cpu 252 MOVQ R8, 8(SP) // regs 253 MOVQ CX, 16(SP) // userCR3 254 TESTQ R9, R9 255 JNZ do_iret 256 CALL ·sysret(SB) 257 JMP done_sysret_or_iret 258 do_iret: 259 CALL ·iret(SB) 260 done_sysret_or_iret: 261 MOVQ 24(SP), AX // vector 262 ADDQ $32, SP 263 MOVQ AX, ret+40(FP) 264 265 // Save application floating point state. 266 MOVQ fpState+16(FP), DI 267 MOVB ·hasXSAVE(SB), BX 268 MOVB ·hasXSAVEOPT(SB), CX 269 TESTB BX, BX 270 JZ no_xsave 271 // Use xsave/xsaveopt to save all extended state. 272 // We save everything unconditionally by setting RFBM to all 1's. 273 MOVL $0xffffffff, AX 274 MOVL $0xffffffff, DX 275 TESTB CX, CX 276 JZ no_xsaveopt 277 BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI) 278 JMP fpsave_done 279 no_xsaveopt: 280 BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI) 281 JMP fpsave_done 282 no_xsave: 283 FXSAVE64 0(DI) 284 fpsave_done: 285 286 // Restore MXCSR and the x87 control word after one of the two floating 287 // point save cases above, to ensure the application versions are saved 288 // before being clobbered here. 289 LDMXCSR mxcsr-0(SP) 290 291 // FLDCW is a "waiting" x87 instruction, meaning it checks for pending 292 // unmasked exceptions before executing. Thus if userspace has unmasked 293 // an exception and has one pending, it can be raised by FLDCW even 294 // though the new control word will mask exceptions. To prevent this, 295 // we must first clear pending exceptions (which will be restored by 296 // XRSTOR, et al). 297 BYTE $0xDB; BYTE $0xE2; // FNCLEX 298 FLDCW cw-8(SP) 299 300 RET 301 302 // See entry_amd64.go. 303 TEXT ·sysret(SB),NOSPLIT|NOFRAME,$0-32 304 // Set application FS. We can't do this in Go because Go code needs FS. 305 MOVQ regs+8(FP), AX 306 MOVQ PTRACE_FS_BASE(AX), AX 307 308 PUSHQ AX 309 CALL ·writeFS(SB) 310 POPQ AX 311 312 CALL ·jumpToKernel(SB) 313 // Save original state and stack. sysenter() or exception() 314 // from APP(gr3) will switch to this stack, set the return 315 // value (vector: 32(SP)) and then do RET, which will also 316 // automatically return to the lower half. 317 MOVQ cpu+0(FP), BX 318 MOVQ regs+8(FP), AX 319 MOVQ userCR3+16(FP), CX 320 MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX) 321 MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX) 322 MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX) 323 324 // save SP AX userCR3 on the kernel stack. 325 MOVQ CPU_ENTRY(BX), BX 326 LOAD_KERNEL_STACK(BX) 327 PUSHQ PTRACE_RSP(AX) 328 PUSHQ PTRACE_RAX(AX) 329 PUSHQ CX 330 331 // Restore user register state. 332 REGISTERS_LOAD(AX, 0) 333 MOVQ PTRACE_RIP(AX), CX // Needed for SYSRET. 334 MOVQ PTRACE_FLAGS(AX), R11 // Needed for SYSRET. 335 336 // restore userCR3, AX, SP. 337 POPQ AX // Get userCR3. 338 WRITE_CR3() // Switch to userCR3. 339 POPQ AX // Restore AX. 340 POPQ SP // Restore SP. 341 SYSRET64() 342 // sysenter or exception will write our return value and return to our 343 // caller. 344 345 // See entry_amd64.go. 346 TEXT ·iret(SB),NOSPLIT|NOFRAME,$0-32 347 // Set application FS. We can't do this in Go because Go code needs FS. 348 MOVQ regs+8(FP), AX 349 MOVQ PTRACE_FS_BASE(AX), AX 350 351 PUSHQ AX // First argument (FS_BASE) 352 CALL ·writeFS(SB) 353 POPQ AX 354 355 CALL ·jumpToKernel(SB) 356 // Save original state and stack. sysenter() or exception() 357 // from APP(gr3) will switch to this stack, set the return 358 // value (vector: 32(SP)) and then do RET, which will also 359 // automatically return to the lower half. 360 MOVQ cpu+0(FP), BX 361 MOVQ regs+8(FP), AX 362 MOVQ userCR3+16(FP), CX 363 MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX) 364 MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX) 365 MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX) 366 367 // Build an IRET frame & restore state. 368 MOVQ CPU_ENTRY(BX), BX 369 LOAD_KERNEL_STACK(BX) 370 PUSHQ PTRACE_SS(AX) 371 PUSHQ PTRACE_RSP(AX) 372 PUSHQ PTRACE_FLAGS(AX) 373 PUSHQ PTRACE_CS(AX) 374 PUSHQ PTRACE_RIP(AX) 375 PUSHQ PTRACE_RAX(AX) // Save AX on kernel stack. 376 PUSHQ CX // Save userCR3 on kernel stack. 377 REGISTERS_LOAD(AX, 0) // Restore most registers. 378 POPQ AX // Get userCR3. 379 WRITE_CR3() // Switch to userCR3. 380 POPQ AX // Restore AX. 381 IRET() 382 // sysenter or exception will write our return value and return to our 383 // caller. 384 385 // See entry_amd64.go. 386 TEXT ·resume(SB),NOSPLIT|NOFRAME,$0 387 // See iret, above. 388 MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. 389 PUSHQ CPU_REGISTERS+PTRACE_SS(AX) 390 PUSHQ CPU_REGISTERS+PTRACE_RSP(AX) 391 PUSHQ CPU_REGISTERS+PTRACE_FLAGS(AX) 392 PUSHQ CPU_REGISTERS+PTRACE_CS(AX) 393 PUSHQ CPU_REGISTERS+PTRACE_RIP(AX) 394 REGISTERS_LOAD(AX, CPU_REGISTERS) 395 MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX 396 IRET() 397 398 // See entry_amd64.go. 399 TEXT ·start(SB),NOSPLIT|NOFRAME,$0 400 // N.B. This is the vCPU entrypoint. It is not called from Go code and 401 // thus pushes and pops values on the stack until calling into Go 402 // (startGo) because we aren't usually a typical Go assembly frame. 403 PUSHQ $0x0 // Previous frame pointer. 404 MOVQ SP, BP // Set frame pointer. 405 PUSHQ AX // Save CPU. 406 407 // Set up environment required by Go before calling startGo: Go needs 408 // FS_BASE and floating point initialized. 409 MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX 410 PUSHQ BX // First argument (FS_BASE) 411 CALL ·writeFS(SB) 412 POPQ BX 413 414 // First argument (CPU) already at bottom of stack. 415 CALL ·startGo(SB) // Call Go hook. 416 JMP ·resume(SB) // Restore to registers. 417 418 ADDR_OF_FUNC(·AddrOfStart(SB), ·start(SB)); 419 420 // See entry_amd64.go. 421 TEXT ·sysenter(SB),NOSPLIT|NOFRAME,$0 422 // _RFLAGS_IOPL0 is always set in the user mode and it is never set in 423 // the kernel mode. See the comment of UserFlagsSet for more details. 424 TESTL $_RFLAGS_IOPL0, R11 425 JZ kernel 426 user: 427 SWAP_GS() 428 MOVQ AX, ENTRY_SCRATCH0(GS) // Save user AX on scratch. 429 MOVQ ENTRY_KERNEL_CR3(GS), AX // Get kernel cr3 on AX. 430 WRITE_CR3() // Switch to kernel cr3. 431 432 MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. 433 MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX // Get user regs. 434 REGISTERS_SAVE(AX, 0) // Save all except IP, FLAGS, SP, AX. 435 MOVQ CX, PTRACE_RIP(AX) 436 MOVQ R11, PTRACE_FLAGS(AX) 437 MOVQ SP, PTRACE_RSP(AX) 438 MOVQ ENTRY_SCRATCH0(GS), CX // Load saved user AX value. 439 MOVQ CX, PTRACE_RAX(AX) // Save everything else. 440 MOVQ CX, PTRACE_ORIGRAX(AX) 441 442 MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. 443 MOVQ CPU_REGISTERS+PTRACE_RSP(AX), SP // Get stacks. 444 MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code. 445 MOVQ $1, CPU_ERROR_TYPE(AX) // Set error type to user. 446 447 CALL ·jumpToUser(SB) 448 449 // Restore kernel FS_BASE. 450 MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. 451 MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX 452 453 PUSHQ BX // First argument (FS_BASE) 454 CALL ·writeFS(SB) 455 POPQ BX 456 457 MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. 458 459 // Return to the kernel, where the frame is: 460 // 461 // vector (sp+32) 462 // userCR3 (sp+24) 463 // regs (sp+16) 464 // cpu (sp+8) 465 // vcpu.Switch (sp+0) 466 // 467 MOVQ CPU_REGISTERS+PTRACE_RBP(AX), BP // Original base pointer. 468 MOVQ $Syscall, 32(SP) // Output vector. 469 RET 470 471 kernel: 472 // We can't restore the original stack, but we can access the registers 473 // in the CPU state directly. No need for temporary juggling. 474 MOVQ AX, ENTRY_SCRATCH0(GS) 475 MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. 476 REGISTERS_SAVE(AX, CPU_REGISTERS) 477 MOVQ CX, CPU_REGISTERS+PTRACE_RIP(AX) 478 MOVQ R11, CPU_REGISTERS+PTRACE_FLAGS(AX) 479 MOVQ SP, CPU_REGISTERS+PTRACE_RSP(AX) 480 MOVQ ENTRY_SCRATCH0(GS), BX 481 MOVQ BX, CPU_REGISTERS+PTRACE_ORIGRAX(AX) 482 MOVQ BX, CPU_REGISTERS+PTRACE_RAX(AX) 483 MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code. 484 MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel. 485 MOVQ $0xffffffffffffffff, CPU_VECTOR(AX) // Set error type to kernel. 486 487 // Save floating point state. CPU.floatingPointState is a slice, so the 488 // first word of CPU.floatingPointState is a pointer to the destination 489 // array. 490 MOVQ CPU_FPU_STATE(AX), DI 491 MOVB CPU_HAS_XSAVE(AX), BX 492 MOVB CPU_HAS_XSAVEOPT(AX), CX 493 TESTB BX, BX 494 JZ no_xsave 495 // Use xsave/xsaveopt to save all extended state. 496 // We save everything unconditionally by setting RFBM to all 1's. 497 MOVL $0xffffffff, AX 498 MOVL $0xffffffff, DX 499 TESTB CX, CX 500 JZ no_xsaveopt 501 BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI) 502 JMP fpsave_done 503 no_xsaveopt: 504 BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI) 505 JMP fpsave_done 506 no_xsave: 507 FXSAVE64 0(DI) 508 fpsave_done: 509 510 // Call the syscall trampoline. 511 LOAD_KERNEL_STACK(GS) 512 MOVQ ENTRY_CPU_SELF(GS), AX // AX contains the vCPU. 513 PUSHQ AX // First argument (vCPU). 514 CALL ·kernelSyscall(SB) // Call the trampoline. 515 POPQ AX // Pop vCPU. 516 517 // We only trigger a bluepill entry in the bluepill function, and can 518 // therefore be guaranteed that there is no floating point state to be 519 // loaded on resuming from halt. 520 JMP ·resume(SB) 521 522 ADDR_OF_FUNC(·addrOfSysenter(SB), ·sysenter(SB)); 523 524 // exception is a generic exception handler. 525 // 526 // There are two cases handled: 527 // 528 // 1) An exception in kernel mode: this results in saving the state at the time 529 // of the exception and calling the defined hook. 530 // 531 // 2) An exception in guest mode: the original kernel frame is restored, and 532 // the vector & error codes are pushed as return values. 533 // 534 // See below for the stubs that call exception. 535 TEXT ·exception(SB),NOSPLIT|NOFRAME,$0 536 // Determine whether the exception occurred in kernel mode or user 537 // mode, based on the flags. We expect the following stack: 538 // 539 // SS (sp+48) 540 // SP (sp+40) 541 // FLAGS (sp+32) 542 // CS (sp+24) 543 // IP (sp+16) 544 // ERROR_CODE (sp+8) 545 // VECTOR (sp+0) 546 // 547 TESTL $_RFLAGS_IOPL0, 32(SP) 548 JZ kernel 549 550 user: 551 SWAP_GS() 552 ADDQ $-8, SP // Adjust for flags. 553 MOVQ $_KERNEL_FLAGS, 0(SP); BYTE $0x9d; // Reset flags (POPFQ). 554 PUSHQ AX // Save user AX on stack. 555 MOVQ ENTRY_KERNEL_CR3(GS), AX // Get kernel cr3 on AX. 556 WRITE_CR3() // Switch to kernel cr3. 557 558 MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. 559 MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX // Get user regs. 560 REGISTERS_SAVE(AX, 0) // Save all except IP, FLAGS, SP, AX. 561 POPQ BX // Restore original AX. 562 MOVQ BX, PTRACE_RAX(AX) // Save it. 563 MOVQ BX, PTRACE_ORIGRAX(AX) 564 MOVQ 16(SP), BX; MOVQ BX, PTRACE_RIP(AX) 565 MOVQ 24(SP), CX; MOVQ CX, PTRACE_CS(AX) 566 MOVQ 32(SP), DX; MOVQ DX, PTRACE_FLAGS(AX) 567 MOVQ 40(SP), DI; MOVQ DI, PTRACE_RSP(AX) 568 MOVQ 48(SP), SI; MOVQ SI, PTRACE_SS(AX) 569 570 CALL ·jumpToUser(SB) 571 572 // Restore kernel FS_BASE. 573 MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. 574 MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX 575 576 PUSHQ BX // First argument (FS_BASE) 577 CALL ·writeFS(SB) 578 POPQ BX 579 580 // Copy out and return. 581 MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. 582 MOVQ 0(SP), BX // Load vector. 583 MOVQ 8(SP), CX // Load error code. 584 MOVQ CPU_REGISTERS+PTRACE_RSP(AX), SP // Original stack (kernel version). 585 MOVQ CPU_REGISTERS+PTRACE_RBP(AX), BP // Original base pointer. 586 MOVQ CX, CPU_ERROR_CODE(AX) // Set error code. 587 MOVQ $1, CPU_ERROR_TYPE(AX) // Set error type to user. 588 MOVQ BX, 32(SP) // Output vector. 589 RET 590 591 kernel: 592 // As per above, we can save directly. 593 PUSHQ AX 594 MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. 595 REGISTERS_SAVE(AX, CPU_REGISTERS) 596 POPQ BX 597 MOVQ BX, CPU_REGISTERS+PTRACE_RAX(AX) 598 MOVQ BX, CPU_REGISTERS+PTRACE_ORIGRAX(AX) 599 MOVQ 16(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_RIP(AX) 600 MOVQ 32(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_FLAGS(AX) 601 MOVQ 40(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_RSP(AX) 602 603 // Set the error code and adjust the stack. 604 MOVQ 8(SP), BX // Load the error code. 605 MOVQ BX, CPU_ERROR_CODE(AX) // Copy out to the CPU. 606 MOVQ 0(SP), BX // Load the error code. 607 MOVQ BX, CPU_VECTOR(AX) // Copy out to the CPU. 608 BYTE $0x0f; BYTE $0x20; BYTE $0xd3; // MOV CR2, RBX 609 MOVQ BX, CPU_FAULT_ADDR(AX) 610 MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel. 611 612 // Save floating point state. CPU.floatingPointState is a slice, so the 613 // first word of CPU.floatingPointState is a pointer to the destination 614 // array. 615 MOVQ CPU_FPU_STATE(AX), DI 616 MOVB CPU_HAS_XSAVE(AX), BX 617 MOVB CPU_HAS_XSAVEOPT(AX), CX 618 TESTB BX, BX 619 JZ no_xsave 620 // Use xsave/xsaveopt to save all extended state. 621 // We save everything unconditionally by setting RFBM to all 1's. 622 MOVL $0xffffffff, AX 623 MOVL $0xffffffff, DX 624 TESTB CX, CX 625 JZ no_xsaveopt 626 BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI) 627 JMP fpsave_done 628 no_xsaveopt: 629 BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI) 630 JMP fpsave_done 631 no_xsave: 632 FXSAVE64 0(DI) 633 fpsave_done: 634 635 // Call the exception trampoline. 636 MOVQ 0(SP), BX // BX contains the vector. 637 LOAD_KERNEL_STACK(GS) 638 MOVQ ENTRY_CPU_SELF(GS), AX // AX contains the vCPU. 639 PUSHQ BX // Second argument (vector). 640 PUSHQ AX // First argument (vCPU). 641 CALL ·kernelException(SB) // Call the trampoline. 642 POPQ BX // Pop vector. 643 POPQ AX // Pop vCPU. 644 645 // We only trigger a bluepill entry in the bluepill function, and can 646 // therefore be guaranteed that there is no floating point state to be 647 // loaded on resuming from halt. 648 JMP ·resume(SB) 649 650 #define EXCEPTION_WITH_ERROR(value, symbol, addr) \ 651 ADDR_OF_FUNC(addr, symbol); \ 652 TEXT symbol,NOSPLIT|NOFRAME,$0; \ 653 PUSHQ $value; \ 654 JMP ·exception(SB); 655 656 #define EXCEPTION_WITHOUT_ERROR(value, symbol, addr) \ 657 ADDR_OF_FUNC(addr, symbol); \ 658 TEXT symbol,NOSPLIT|NOFRAME,$0; \ 659 PUSHQ $0x0; \ 660 PUSHQ $value; \ 661 JMP ·exception(SB); 662 663 EXCEPTION_WITHOUT_ERROR(DivideByZero, ·divideByZero(SB), ·addrOfDivideByZero(SB)) 664 EXCEPTION_WITHOUT_ERROR(Debug, ·debug(SB), ·addrOfDebug(SB)) 665 EXCEPTION_WITHOUT_ERROR(NMI, ·nmi(SB), ·addrOfNMI(SB)) 666 EXCEPTION_WITHOUT_ERROR(Breakpoint, ·breakpoint(SB), ·addrOfBreakpoint(SB)) 667 EXCEPTION_WITHOUT_ERROR(Overflow, ·overflow(SB), ·addrOfOverflow(SB)) 668 EXCEPTION_WITHOUT_ERROR(BoundRangeExceeded, ·boundRangeExceeded(SB), ·addrOfBoundRangeExceeded(SB)) 669 EXCEPTION_WITHOUT_ERROR(InvalidOpcode, ·invalidOpcode(SB), ·addrOfInvalidOpcode(SB)) 670 EXCEPTION_WITHOUT_ERROR(DeviceNotAvailable, ·deviceNotAvailable(SB), ·addrOfDeviceNotAvailable(SB)) 671 EXCEPTION_WITH_ERROR(DoubleFault, ·doubleFault(SB), ·addrOfDoubleFault(SB)) 672 EXCEPTION_WITHOUT_ERROR(CoprocessorSegmentOverrun, ·coprocessorSegmentOverrun(SB), ·addrOfCoprocessorSegmentOverrun(SB)) 673 EXCEPTION_WITH_ERROR(InvalidTSS, ·invalidTSS(SB), ·addrOfInvalidTSS(SB)) 674 EXCEPTION_WITH_ERROR(SegmentNotPresent, ·segmentNotPresent(SB), ·addrOfSegmentNotPresent(SB)) 675 EXCEPTION_WITH_ERROR(StackSegmentFault, ·stackSegmentFault(SB), ·addrOfStackSegmentFault(SB)) 676 EXCEPTION_WITH_ERROR(GeneralProtectionFault, ·generalProtectionFault(SB), ·addrOfGeneralProtectionFault(SB)) 677 EXCEPTION_WITH_ERROR(PageFault, ·pageFault(SB), ·addrOfPageFault(SB)) 678 EXCEPTION_WITHOUT_ERROR(X87FloatingPointException, ·x87FloatingPointException(SB), ·addrOfX87FloatingPointException(SB)) 679 EXCEPTION_WITH_ERROR(AlignmentCheck, ·alignmentCheck(SB), ·addrOfAlignmentCheck(SB)) 680 EXCEPTION_WITHOUT_ERROR(MachineCheck, ·machineCheck(SB), ·addrOfMachineCheck(SB)) 681 EXCEPTION_WITHOUT_ERROR(SIMDFloatingPointException, ·simdFloatingPointException(SB), ·addrOfSimdFloatingPointException(SB)) 682 EXCEPTION_WITHOUT_ERROR(VirtualizationException, ·virtualizationException(SB), ·addrOfVirtualizationException(SB)) 683 EXCEPTION_WITH_ERROR(SecurityException, ·securityException(SB), ·addrOfSecurityException(SB)) 684 EXCEPTION_WITHOUT_ERROR(SyscallInt80, ·syscallInt80(SB), ·addrOfSyscallInt80(SB))