github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/platform/kvm/bluepill_amd64.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build amd64
    16  // +build amd64
    17  
    18  package kvm
    19  
    20  import (
    21  	"golang.org/x/sys/unix"
    22  	"github.com/metacubex/gvisor/pkg/ring0"
    23  	"github.com/metacubex/gvisor/pkg/sentry/arch"
    24  )
    25  
    26  var (
    27  	// The action for bluepillSignal is changed by sigaction().
    28  	bluepillSignal = unix.SIGSEGV
    29  )
    30  
    31  // bluepillArchEnter is called during bluepillEnter.
    32  //
    33  //go:nosplit
    34  func bluepillArchEnter(context *arch.SignalContext64) *vCPU {
    35  	c := vCPUPtr(uintptr(context.Rax))
    36  	regs := c.CPU.Registers()
    37  	regs.R8 = context.R8
    38  	regs.R9 = context.R9
    39  	regs.R10 = context.R10
    40  	regs.R11 = context.R11
    41  	regs.R12 = context.R12
    42  	regs.R13 = context.R13
    43  	regs.R14 = context.R14
    44  	regs.R15 = context.R15
    45  	regs.Rdi = context.Rdi
    46  	regs.Rsi = context.Rsi
    47  	regs.Rbp = context.Rbp
    48  	regs.Rbx = context.Rbx
    49  	regs.Rdx = context.Rdx
    50  	regs.Rax = context.Rax
    51  	regs.Rcx = context.Rcx
    52  	regs.Rsp = context.Rsp
    53  	regs.Rip = context.Rip
    54  	regs.Eflags = context.Eflags
    55  	regs.Eflags &^= uint64(ring0.KernelFlagsClear)
    56  	regs.Eflags |= ring0.KernelFlagsSet
    57  	regs.Cs = uint64(ring0.Kcode)
    58  	regs.Ds = uint64(ring0.Udata)
    59  	regs.Es = uint64(ring0.Udata)
    60  	regs.Ss = uint64(ring0.Kdata)
    61  	return c
    62  }
    63  
    64  // hltSanityCheck verifies the current state to detect obvious corruption.
    65  //
    66  //go:nosplit
    67  func (c *vCPU) hltSanityCheck() {
    68  	vector := c.CPU.Vector()
    69  	switch ring0.Vector(vector) {
    70  	case ring0.PageFault:
    71  		if c.CPU.FaultAddr() < ring0.KernelStartAddress {
    72  			return
    73  		}
    74  	case ring0.DoubleFault:
    75  	case ring0.GeneralProtectionFault:
    76  	case ring0.InvalidOpcode:
    77  	case ring0.MachineCheck:
    78  	case ring0.VirtualizationException:
    79  	default:
    80  		return
    81  	}
    82  
    83  	printHex([]byte("Vector    = "), uint64(c.CPU.Vector()))
    84  	printHex([]byte("FaultAddr = "), uint64(c.CPU.FaultAddr()))
    85  	printHex([]byte("rip       = "), uint64(c.CPU.Registers().Rip))
    86  	printHex([]byte("rsp       = "), uint64(c.CPU.Registers().Rsp))
    87  	throw("fault")
    88  }
    89  
    90  // KernelSyscall handles kernel syscalls.
    91  //
    92  // +checkescape:all
    93  //
    94  //go:nosplit
    95  func (c *vCPU) KernelSyscall() {
    96  	regs := c.Registers()
    97  	if regs.Rax != ^uint64(0) {
    98  		regs.Rip -= 2 // Rewind.
    99  	}
   100  	// N.B. Since KernelSyscall is called when the kernel makes a syscall,
   101  	// FS_BASE is already set for correct execution of this function.
   102  	//
   103  	// Refresher on syscall/exception handling:
   104  	// 1. When the sentry is in guest mode and makes a syscall, it goes to
   105  	// sysenter(), which saves the register state (including RIP of SYSCALL
   106  	// instruction) to vCPU.registers.
   107  	// 2. It then calls KernelSyscall, which rewinds the IP and executes
   108  	// HLT.
   109  	// 3. HLT does a VM-exit to bluepillHandler, which returns from the
   110  	// signal handler using vCPU.registers, directly to the SYSCALL
   111  	// instruction.
   112  	// 4. Later, when we want to re-use the vCPU (perhaps on a different
   113  	// host thread), we set the new thread's registers in vCPU.registers
   114  	// (as opposed to setting the KVM registers with KVM_SET_REGS).
   115  	// 5. KVM_RUN thus enters the guest with the old register state,
   116  	// immediately following the HLT instruction, returning here.
   117  	// 6. We then restore FS_BASE and the full registers from vCPU.register
   118  	// to return from sysenter() back to the desired bluepill point from
   119  	// the host.
   120  	ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment.
   121  }
   122  
   123  // KernelException handles kernel exceptions.
   124  //
   125  // +checkescape:all
   126  //
   127  //go:nosplit
   128  func (c *vCPU) KernelException(vector ring0.Vector) {
   129  	regs := c.Registers()
   130  	if vector == ring0.Vector(bounce) {
   131  		// This go-routine was saved in hr3 and resumed in gr0 with the
   132  		// userspace flags. Let's adjust flags and skip the interrupt.
   133  		regs.Eflags &^= uint64(ring0.KernelFlagsClear)
   134  		regs.Eflags |= ring0.KernelFlagsSet
   135  		return
   136  	}
   137  	// See above.
   138  	ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment.
   139  }
   140  
   141  // bluepillArchExit is called during bluepillEnter.
   142  //
   143  //go:nosplit
   144  func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
   145  	regs := c.CPU.Registers()
   146  	context.R8 = regs.R8
   147  	context.R9 = regs.R9
   148  	context.R10 = regs.R10
   149  	context.R11 = regs.R11
   150  	context.R12 = regs.R12
   151  	context.R13 = regs.R13
   152  	context.R14 = regs.R14
   153  	context.R15 = regs.R15
   154  	context.Rdi = regs.Rdi
   155  	context.Rsi = regs.Rsi
   156  	context.Rbp = regs.Rbp
   157  	context.Rbx = regs.Rbx
   158  	context.Rdx = regs.Rdx
   159  	context.Rax = regs.Rax
   160  	context.Rcx = regs.Rcx
   161  	context.Rsp = regs.Rsp
   162  	context.Rip = regs.Rip
   163  	context.Eflags = regs.Eflags
   164  
   165  	c.FloatingPointState().PrepForHostSigframe()
   166  	// Set the context pointer to the saved floating point state. This is
   167  	// where the guest data has been serialized, the kernel will restore
   168  	// from this new pointer value.
   169  	context.Fpstate = uint64(uintptrValue(c.FloatingPointState().BytePointer())) // escapes: no.
   170  }