github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/platform/kvm/bluepill_amd64.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build amd64 16 // +build amd64 17 18 package kvm 19 20 import ( 21 "github.com/MerlinKodo/gvisor/pkg/ring0" 22 "github.com/MerlinKodo/gvisor/pkg/sentry/arch" 23 "golang.org/x/sys/unix" 24 ) 25 26 var ( 27 // The action for bluepillSignal is changed by sigaction(). 28 bluepillSignal = unix.SIGSEGV 29 ) 30 31 // bluepillArchEnter is called during bluepillEnter. 32 // 33 //go:nosplit 34 func bluepillArchEnter(context *arch.SignalContext64) *vCPU { 35 c := vCPUPtr(uintptr(context.Rax)) 36 regs := c.CPU.Registers() 37 regs.R8 = context.R8 38 regs.R9 = context.R9 39 regs.R10 = context.R10 40 regs.R11 = context.R11 41 regs.R12 = context.R12 42 regs.R13 = context.R13 43 regs.R14 = context.R14 44 regs.R15 = context.R15 45 regs.Rdi = context.Rdi 46 regs.Rsi = context.Rsi 47 regs.Rbp = context.Rbp 48 regs.Rbx = context.Rbx 49 regs.Rdx = context.Rdx 50 regs.Rax = context.Rax 51 regs.Rcx = context.Rcx 52 regs.Rsp = context.Rsp 53 regs.Rip = context.Rip 54 regs.Eflags = context.Eflags 55 regs.Eflags &^= uint64(ring0.KernelFlagsClear) 56 regs.Eflags |= ring0.KernelFlagsSet 57 regs.Cs = uint64(ring0.Kcode) 58 regs.Ds = uint64(ring0.Udata) 59 regs.Es = uint64(ring0.Udata) 60 regs.Ss = uint64(ring0.Kdata) 61 return c 62 } 63 64 // hltSanityCheck verifies the current state to detect obvious corruption. 65 // 66 //go:nosplit 67 func (c *vCPU) hltSanityCheck() { 68 vector := c.CPU.Vector() 69 switch ring0.Vector(vector) { 70 case ring0.PageFault: 71 if c.CPU.FaultAddr() < ring0.KernelStartAddress { 72 return 73 } 74 case ring0.DoubleFault: 75 case ring0.GeneralProtectionFault: 76 case ring0.InvalidOpcode: 77 case ring0.MachineCheck: 78 case ring0.VirtualizationException: 79 default: 80 return 81 } 82 83 printHex([]byte("Vector = "), uint64(c.CPU.Vector())) 84 printHex([]byte("FaultAddr = "), uint64(c.CPU.FaultAddr())) 85 printHex([]byte("rip = "), uint64(c.CPU.Registers().Rip)) 86 printHex([]byte("rsp = "), uint64(c.CPU.Registers().Rsp)) 87 throw("fault") 88 } 89 90 // KernelSyscall handles kernel syscalls. 91 // 92 // +checkescape:all 93 // 94 //go:nosplit 95 func (c *vCPU) KernelSyscall() { 96 regs := c.Registers() 97 if regs.Rax != ^uint64(0) { 98 regs.Rip -= 2 // Rewind. 99 } 100 // N.B. Since KernelSyscall is called when the kernel makes a syscall, 101 // FS_BASE is already set for correct execution of this function. 102 // 103 // Refresher on syscall/exception handling: 104 // 1. When the sentry is in guest mode and makes a syscall, it goes to 105 // sysenter(), which saves the register state (including RIP of SYSCALL 106 // instruction) to vCPU.registers. 107 // 2. It then calls KernelSyscall, which rewinds the IP and executes 108 // HLT. 109 // 3. HLT does a VM-exit to bluepillHandler, which returns from the 110 // signal handler using vCPU.registers, directly to the SYSCALL 111 // instruction. 112 // 4. Later, when we want to re-use the vCPU (perhaps on a different 113 // host thread), we set the new thread's registers in vCPU.registers 114 // (as opposed to setting the KVM registers with KVM_SET_REGS). 115 // 5. KVM_RUN thus enters the guest with the old register state, 116 // immediately following the HLT instruction, returning here. 117 // 6. We then restore FS_BASE and the full registers from vCPU.register 118 // to return from sysenter() back to the desired bluepill point from 119 // the host. 120 ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment. 121 } 122 123 // KernelException handles kernel exceptions. 124 // 125 // +checkescape:all 126 // 127 //go:nosplit 128 func (c *vCPU) KernelException(vector ring0.Vector) { 129 regs := c.Registers() 130 if vector == ring0.Vector(bounce) { 131 // This go-routine was saved in hr3 and resumed in gr0 with the 132 // userspace flags. Let's adjust flags and skip the interrupt. 133 regs.Eflags &^= uint64(ring0.KernelFlagsClear) 134 regs.Eflags |= ring0.KernelFlagsSet 135 return 136 } 137 // See above. 138 ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment. 139 } 140 141 // bluepillArchExit is called during bluepillEnter. 142 // 143 //go:nosplit 144 func bluepillArchExit(c *vCPU, context *arch.SignalContext64) { 145 regs := c.CPU.Registers() 146 context.R8 = regs.R8 147 context.R9 = regs.R9 148 context.R10 = regs.R10 149 context.R11 = regs.R11 150 context.R12 = regs.R12 151 context.R13 = regs.R13 152 context.R14 = regs.R14 153 context.R15 = regs.R15 154 context.Rdi = regs.Rdi 155 context.Rsi = regs.Rsi 156 context.Rbp = regs.Rbp 157 context.Rbx = regs.Rbx 158 context.Rdx = regs.Rdx 159 context.Rax = regs.Rax 160 context.Rcx = regs.Rcx 161 context.Rsp = regs.Rsp 162 context.Rip = regs.Rip 163 context.Eflags = regs.Eflags 164 165 c.FloatingPointState().PrepForHostSigframe() 166 // Set the context pointer to the saved floating point state. This is 167 // where the guest data has been serialized, the kernel will restore 168 // from this new pointer value. 169 context.Fpstate = uint64(uintptrValue(c.FloatingPointState().BytePointer())) // escapes: no. 170 }