github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/platform/kvm/bluepill_unsafe.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build go1.12 16 // +build !go1.18 17 18 // Check go:linkname function signatures when updating Go version. 19 20 package kvm 21 22 import ( 23 "sync/atomic" 24 "unsafe" 25 26 "golang.org/x/sys/unix" 27 "github.com/SagerNet/gvisor/pkg/sentry/arch" 28 ) 29 30 //go:linkname throw runtime.throw 31 func throw(string) 32 33 // vCPUPtr returns a CPU for the given address. 34 // 35 //go:nosplit 36 func vCPUPtr(addr uintptr) *vCPU { 37 return (*vCPU)(unsafe.Pointer(addr)) 38 } 39 40 // bytePtr returns a bytePtr for the given address. 41 // 42 //go:nosplit 43 func bytePtr(addr uintptr) *byte { 44 return (*byte)(unsafe.Pointer(addr)) 45 } 46 47 // uintptrValue returns a uintptr for the given address. 48 // 49 //go:nosplit 50 func uintptrValue(addr *byte) uintptr { 51 return (uintptr)(unsafe.Pointer(addr)) 52 } 53 54 // bluepillArchContext returns the UContext64. 55 // 56 //go:nosplit 57 func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 { 58 return &((*arch.UContext64)(context).MContext) 59 } 60 61 // bluepillHandleHlt is reponsible for handling VM-Exit. 62 // 63 //go:nosplit 64 func bluepillGuestExit(c *vCPU, context unsafe.Pointer) { 65 // Increment our counter. 66 atomic.AddUint64(&c.guestExits, 1) 67 68 // Copy out registers. 69 bluepillArchExit(c, bluepillArchContext(context)) 70 71 // Return to the vCPUReady state; notify any waiters. 72 user := atomic.LoadUint32(&c.state) & vCPUUser 73 switch atomic.SwapUint32(&c.state, user) { 74 case user | vCPUGuest: // Expected case. 75 case user | vCPUGuest | vCPUWaiter: 76 c.notify() 77 default: 78 throw("invalid state") 79 } 80 } 81 82 // bluepillHandler is called from the signal stub. 83 // 84 // The world may be stopped while this is executing, and it executes on the 85 // signal stack. It should only execute raw system calls and functions that are 86 // explicitly marked go:nosplit. 87 // 88 // +checkescape:all 89 // 90 //go:nosplit 91 func bluepillHandler(context unsafe.Pointer) { 92 // Sanitize the registers; interrupts must always be disabled. 93 c := bluepillArchEnter(bluepillArchContext(context)) 94 95 // Mark this as guest mode. 96 switch atomic.SwapUint32(&c.state, vCPUGuest|vCPUUser) { 97 case vCPUUser: // Expected case. 98 case vCPUUser | vCPUWaiter: 99 c.notify() 100 default: 101 throw("invalid state") 102 } 103 104 for { 105 _, _, errno := unix.RawSyscall(unix.SYS_IOCTL, uintptr(c.fd), _KVM_RUN, 0) // escapes: no. 106 switch errno { 107 case 0: // Expected case. 108 case unix.EINTR: 109 // First, we process whatever pending signal 110 // interrupted KVM. Since we're in a signal handler 111 // currently, all signals are masked and the signal 112 // must have been delivered directly to this thread. 113 timeout := unix.Timespec{} 114 sig, _, errno := unix.RawSyscall6( // escapes: no. 115 unix.SYS_RT_SIGTIMEDWAIT, 116 uintptr(unsafe.Pointer(&bounceSignalMask)), 117 0, // siginfo. 118 uintptr(unsafe.Pointer(&timeout)), // timeout. 119 8, // sigset size. 120 0, 0) 121 if errno == unix.EAGAIN { 122 continue 123 } 124 if errno != 0 { 125 throw("error waiting for pending signal") 126 } 127 if sig != uintptr(bounceSignal) { 128 throw("unexpected signal") 129 } 130 131 // Check whether the current state of the vCPU is ready 132 // for interrupt injection. Because we don't have a 133 // PIC, we can't inject an interrupt while they are 134 // masked. We need to request a window if it's not 135 // ready. 136 if bluepillReadyStopGuest(c) { 137 // Force injection below; the vCPU is ready. 138 c.runData.exitReason = _KVM_EXIT_IRQ_WINDOW_OPEN 139 } else { 140 c.runData.requestInterruptWindow = 1 141 continue // Rerun vCPU. 142 } 143 case unix.EFAULT: 144 // If a fault is not serviceable due to the host 145 // backing pages having page permissions, instead of an 146 // MMIO exit we receive EFAULT from the run ioctl. We 147 // always inject an NMI here since we may be in kernel 148 // mode and have interrupts disabled. 149 bluepillSigBus(c) 150 continue // Rerun vCPU. 151 case unix.ENOSYS: 152 bluepillHandleEnosys(c) 153 continue 154 default: 155 throw("run failed") 156 } 157 158 switch c.runData.exitReason { 159 case _KVM_EXIT_EXCEPTION: 160 c.die(bluepillArchContext(context), "exception") 161 return 162 case _KVM_EXIT_IO: 163 c.die(bluepillArchContext(context), "I/O") 164 return 165 case _KVM_EXIT_INTERNAL_ERROR: 166 // An internal error is typically thrown when emulation 167 // fails. This can occur via the MMIO path below (and 168 // it might fail because we have multiple regions that 169 // are not mapped). We would actually prefer that no 170 // emulation occur, and don't mind at all if it fails. 171 case _KVM_EXIT_HYPERCALL: 172 c.die(bluepillArchContext(context), "hypercall") 173 return 174 case _KVM_EXIT_DEBUG: 175 c.die(bluepillArchContext(context), "debug") 176 return 177 case _KVM_EXIT_HLT: 178 bluepillGuestExit(c, context) 179 return 180 case _KVM_EXIT_MMIO: 181 physical := uintptr(c.runData.data[0]) 182 if getHypercallID(physical) == _KVM_HYPERCALL_VMEXIT { 183 bluepillGuestExit(c, context) 184 return 185 } 186 187 // Increment the fault count. 188 atomic.AddUint32(&c.faults, 1) 189 190 // For MMIO, the physical address is the first data item. 191 physical = uintptr(c.runData.data[0]) 192 virtual, ok := handleBluepillFault(c.machine, physical, physicalRegions, _KVM_MEM_FLAGS_NONE) 193 if !ok { 194 c.die(bluepillArchContext(context), "invalid physical address") 195 return 196 } 197 198 // We now need to fill in the data appropriately. KVM 199 // expects us to provide the result of the given MMIO 200 // operation in the runData struct. This is safe 201 // because, if a fault occurs here, the same fault 202 // would have occurred in guest mode. The kernel should 203 // not create invalid page table mappings. 204 data := (*[8]byte)(unsafe.Pointer(&c.runData.data[1])) 205 length := (uintptr)((uint32)(c.runData.data[2])) 206 write := (uint8)(((c.runData.data[2] >> 32) & 0xff)) != 0 207 for i := uintptr(0); i < length; i++ { 208 b := bytePtr(uintptr(virtual) + i) 209 if write { 210 // Write to the given address. 211 *b = data[i] 212 } else { 213 // Read from the given address. 214 data[i] = *b 215 } 216 } 217 case _KVM_EXIT_IRQ_WINDOW_OPEN: 218 bluepillStopGuest(c) 219 case _KVM_EXIT_SHUTDOWN: 220 c.die(bluepillArchContext(context), "shutdown") 221 return 222 case _KVM_EXIT_FAIL_ENTRY: 223 c.die(bluepillArchContext(context), "entry failed") 224 return 225 default: 226 bluepillArchHandleExit(c, context) 227 return 228 } 229 } 230 }