github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/platform/kvm/bluepill_fault.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kvm
    16  
    17  import (
    18  	"sync/atomic"
    19  
    20  	"golang.org/x/sys/unix"
    21  	"github.com/metacubex/gvisor/pkg/hostarch"
    22  )
    23  
    24  const (
    25  	// faultBlockSize is the size used for servicing memory faults.
    26  	//
    27  	// This should be large enough to avoid frequent faults and avoid using
    28  	// all available KVM slots (~512), but small enough that KVM does not
    29  	// complain about slot sizes (~4GB). See handleBluepillFault for how
    30  	// this block is used.
    31  	faultBlockSize = 2 << 30
    32  
    33  	// faultBlockMask is the mask for the fault blocks.
    34  	//
    35  	// This must be typed to avoid overflow complaints (ugh).
    36  	faultBlockMask = ^uintptr(faultBlockSize - 1)
    37  )
    38  
    39  // yield yields the CPU.
    40  //
    41  //go:nosplit
    42  func yield() {
    43  	unix.RawSyscall(unix.SYS_SCHED_YIELD, 0, 0, 0)
    44  }
    45  
    46  // calculateBluepillFault calculates the fault address range.
    47  //
    48  //go:nosplit
    49  func calculateBluepillFault(physical uintptr, phyRegions []physicalRegion) (virtualStart, physicalStart, length uintptr, pr *physicalRegion) {
    50  	alignedPhysical := physical &^ uintptr(hostarch.PageSize-1)
    51  	for i, pr := range phyRegions {
    52  		end := pr.physical + pr.length
    53  		if physical < pr.physical || physical >= end {
    54  			continue
    55  		}
    56  
    57  		// Adjust the block to match our size.
    58  		physicalStart = pr.physical + (alignedPhysical-pr.physical)&faultBlockMask
    59  		virtualStart = pr.virtual + (physicalStart - pr.physical)
    60  		physicalEnd := physicalStart + faultBlockSize
    61  		if physicalEnd > end {
    62  			physicalEnd = end
    63  		}
    64  		length = physicalEnd - physicalStart
    65  		return virtualStart, physicalStart, length, &phyRegions[i]
    66  	}
    67  
    68  	return 0, 0, 0, nil
    69  }
    70  
    71  // handleBluepillFault handles a physical fault.
    72  //
    73  // The corresponding virtual address is returned. This may throw on error.
    74  //
    75  //go:nosplit
    76  func handleBluepillFault(m *machine, physical uintptr, phyRegions []physicalRegion) (uintptr, bool) {
    77  	// Paging fault: we need to map the underlying physical pages for this
    78  	// fault. This all has to be done in this function because we're in a
    79  	// signal handler context. (We can't call any functions that might
    80  	// split the stack.)
    81  	virtualStart, physicalStart, length, pr := calculateBluepillFault(physical, phyRegions)
    82  	if pr == nil {
    83  		return 0, false
    84  	}
    85  
    86  	// Set the KVM slot.
    87  	//
    88  	// First, we need to acquire the exclusive right to set a slot.  See
    89  	// machine.nextSlot for information about the protocol.
    90  	slot := m.nextSlot.Swap(^uint32(0))
    91  	for slot == ^uint32(0) {
    92  		yield() // Race with another call.
    93  		slot = m.nextSlot.Swap(^uint32(0))
    94  	}
    95  	flags := _KVM_MEM_FLAGS_NONE
    96  	if pr.readOnly {
    97  		flags |= _KVM_MEM_READONLY
    98  	}
    99  	errno := m.setMemoryRegion(int(slot), physicalStart, length, virtualStart, flags)
   100  	if errno == 0 {
   101  		// Store the physical address in the slot. This is used to
   102  		// avoid calls to handleBluepillFault in the future (see
   103  		// machine.mapPhysical).
   104  		atomic.StoreUintptr(&m.usedSlots[slot], physicalStart)
   105  		// Successfully added region; we can increment nextSlot and
   106  		// allow another set to proceed here.
   107  		m.nextSlot.Store(slot + 1)
   108  		return virtualStart + (physical - physicalStart), true
   109  	}
   110  
   111  	// Release our slot (still available).
   112  	m.nextSlot.Store(slot)
   113  
   114  	switch errno {
   115  	case unix.EEXIST:
   116  		// The region already exists. It's possible that we raced with
   117  		// another vCPU here. We just revert nextSlot and return true,
   118  		// because this must have been satisfied by some other vCPU.
   119  		return virtualStart + (physical - physicalStart), true
   120  	case unix.EINVAL:
   121  		throw("set memory region failed; out of slots")
   122  	case unix.ENOMEM:
   123  		throw("set memory region failed: out of memory")
   124  	case unix.EFAULT:
   125  		throw("set memory region failed: invalid physical range")
   126  	default:
   127  		throw("set memory region failed: unknown reason")
   128  	}
   129  
   130  	panic("unreachable")
   131  }