github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/platform/kvm/bluepill_unsafe.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build go1.18
    16  // +build go1.18
    17  
    18  // //go:linkname directives type-checked by checklinkname. Any other
    19  // non-linkname assumptions outside the Go 1 compatibility guarantee should
    20  // have an accompanied vet check or version guard build tag.
    21  
    22  package kvm
    23  
    24  import (
    25  	"unsafe"
    26  
    27  	"golang.org/x/sys/unix"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/arch"
    29  )
    30  
    31  //go:linkname throw runtime.throw
    32  func throw(s string)
    33  
    34  // vCPUPtr returns a CPU for the given address.
    35  //
    36  //go:nosplit
    37  func vCPUPtr(addr uintptr) *vCPU {
    38  	return (*vCPU)(unsafe.Pointer(addr))
    39  }
    40  
    41  // bytePtr returns a bytePtr for the given address.
    42  //
    43  //go:nosplit
    44  func bytePtr(addr uintptr) *byte {
    45  	return (*byte)(unsafe.Pointer(addr))
    46  }
    47  
    48  // uintptrValue returns a uintptr for the given address.
    49  //
    50  //go:nosplit
    51  func uintptrValue(addr *byte) uintptr {
    52  	return (uintptr)(unsafe.Pointer(addr))
    53  }
    54  
    55  // bluepillArchContext returns the UContext64.
    56  //
    57  //go:nosplit
    58  func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 {
    59  	return &((*arch.UContext64)(context).MContext)
    60  }
    61  
    62  // bluepillHandleHlt is reponsible for handling VM-Exit.
    63  //
    64  //go:nosplit
    65  func bluepillGuestExit(c *vCPU, context unsafe.Pointer) {
    66  	// Increment our counter.
    67  	c.guestExits.Add(1)
    68  
    69  	// Copy out registers.
    70  	bluepillArchExit(c, bluepillArchContext(context))
    71  
    72  	// Return to the vCPUReady state; notify any waiters.
    73  	user := c.state.Load() & vCPUUser
    74  	switch c.state.Swap(user) {
    75  	case user | vCPUGuest: // Expected case.
    76  	case user | vCPUGuest | vCPUWaiter:
    77  		c.notify()
    78  	default:
    79  		throw("invalid state")
    80  	}
    81  }
    82  
    83  var hexSyms = []byte("0123456789abcdef")
    84  
    85  //go:nosplit
    86  func printHex(title []byte, val uint64) {
    87  	var str [18]byte
    88  	for i := 0; i < 16; i++ {
    89  		str[16-i] = hexSyms[val&0xf]
    90  		val = val >> 4
    91  	}
    92  	str[0] = ' '
    93  	str[17] = '\n'
    94  	unix.RawSyscall(unix.SYS_WRITE, uintptr(unix.Stderr), uintptr(unsafe.Pointer(&title[0])), uintptr(len(title)))
    95  	unix.RawSyscall(unix.SYS_WRITE, uintptr(unix.Stderr), uintptr(unsafe.Pointer(&str)), 18)
    96  }
    97  
    98  // bluepillHandler is called from the signal stub.
    99  //
   100  // The world may be stopped while this is executing, and it executes on the
   101  // signal stack. It should only execute raw system calls and functions that are
   102  // explicitly marked go:nosplit.
   103  //
   104  // Ideally, this function should switch to gsignal, as runtime.sigtramp does,
   105  // but that is tedious given all the runtime internals. That said, using
   106  // gsignal inside a signal handler is not _required_, provided we avoid stack
   107  // splits and allocations. Note that calling any splittable function here will
   108  // be flaky; if the signal stack is below the G stack then we will trigger a
   109  // split and crash. If above, we won't trigger a split.
   110  //
   111  // +checkescape:all
   112  //
   113  //go:nosplit
   114  func bluepillHandler(context unsafe.Pointer) {
   115  	// Sanitize the registers; interrupts must always be disabled.
   116  	c := bluepillArchEnter(bluepillArchContext(context))
   117  
   118  	// Mark this as guest mode.
   119  	switch c.state.Swap(vCPUGuest | vCPUUser) {
   120  	case vCPUUser: // Expected case.
   121  	case vCPUUser | vCPUWaiter:
   122  		c.notify()
   123  	default:
   124  		throw("invalid state")
   125  	}
   126  
   127  	for {
   128  		hostExitCounter.Increment()
   129  		_, _, errno := unix.RawSyscall(unix.SYS_IOCTL, uintptr(c.fd), _KVM_RUN, 0) // escapes: no.
   130  		switch errno {
   131  		case 0: // Expected case.
   132  		case unix.EINTR:
   133  			interruptCounter.Increment()
   134  			// First, we process whatever pending signal
   135  			// interrupted KVM. Since we're in a signal handler
   136  			// currently, all signals are masked and the signal
   137  			// must have been delivered directly to this thread.
   138  			timeout := unix.Timespec{}
   139  			sig, _, errno := unix.RawSyscall6( // escapes: no.
   140  				unix.SYS_RT_SIGTIMEDWAIT,
   141  				uintptr(unsafe.Pointer(&bounceSignalMask)),
   142  				0,                                 // siginfo.
   143  				uintptr(unsafe.Pointer(&timeout)), // timeout.
   144  				8,                                 // sigset size.
   145  				0, 0)
   146  			if errno == unix.EAGAIN {
   147  				continue
   148  			}
   149  			if errno != 0 {
   150  				throw("error waiting for pending signal")
   151  			}
   152  			if sig != uintptr(bounceSignal) {
   153  				throw("unexpected signal")
   154  			}
   155  
   156  			// Check whether the current state of the vCPU is ready
   157  			// for interrupt injection. Because we don't have a
   158  			// PIC, we can't inject an interrupt while they are
   159  			// masked. We need to request a window if it's not
   160  			// ready.
   161  			if bluepillReadyStopGuest(c) {
   162  				// Force injection below; the vCPU is ready.
   163  				c.runData.exitReason = _KVM_EXIT_IRQ_WINDOW_OPEN
   164  			} else {
   165  				c.runData.requestInterruptWindow = 1
   166  				continue // Rerun vCPU.
   167  			}
   168  		case unix.EFAULT:
   169  			// If a fault is not serviceable due to the host
   170  			// backing pages having page permissions, instead of an
   171  			// MMIO exit we receive EFAULT from the run ioctl. We
   172  			// always inject an NMI here since we may be in kernel
   173  			// mode and have interrupts disabled.
   174  			bluepillSigBus(c)
   175  			continue // Rerun vCPU.
   176  		case unix.ENOSYS:
   177  			bluepillHandleEnosys(c)
   178  			continue
   179  		default:
   180  			throw("run failed")
   181  		}
   182  
   183  		switch c.runData.exitReason {
   184  		case _KVM_EXIT_EXCEPTION:
   185  			c.die(bluepillArchContext(context), "exception")
   186  			return
   187  		case _KVM_EXIT_IO:
   188  			c.die(bluepillArchContext(context), "I/O")
   189  			return
   190  		case _KVM_EXIT_INTERNAL_ERROR:
   191  			// An internal error is typically thrown when emulation
   192  			// fails. This can occur via the MMIO path below (and
   193  			// it might fail because we have multiple regions that
   194  			// are not mapped). We would actually prefer that no
   195  			// emulation occur, and don't mind at all if it fails.
   196  		case _KVM_EXIT_HYPERCALL:
   197  			c.die(bluepillArchContext(context), "hypercall")
   198  			return
   199  		case _KVM_EXIT_DEBUG:
   200  			c.die(bluepillArchContext(context), "debug")
   201  			return
   202  		case _KVM_EXIT_HLT:
   203  			c.hltSanityCheck()
   204  			bluepillGuestExit(c, context)
   205  			return
   206  		case _KVM_EXIT_MMIO:
   207  			physical := uintptr(c.runData.data[0])
   208  			if getHypercallID(physical) == _KVM_HYPERCALL_VMEXIT {
   209  				bluepillGuestExit(c, context)
   210  				return
   211  			}
   212  
   213  			c.die(bluepillArchContext(context), "exit_mmio")
   214  			return
   215  		case _KVM_EXIT_IRQ_WINDOW_OPEN:
   216  			bluepillStopGuest(c)
   217  		case _KVM_EXIT_SHUTDOWN:
   218  			c.die(bluepillArchContext(context), "shutdown")
   219  			return
   220  		case _KVM_EXIT_FAIL_ENTRY:
   221  			c.die(bluepillArchContext(context), "entry failed")
   222  			return
   223  		default:
   224  			bluepillArchHandleExit(c, context)
   225  			return
   226  		}
   227  	}
   228  }