github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/platform/kvm/bluepill_fault.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kvm 16 17 import ( 18 "sync/atomic" 19 20 "github.com/MerlinKodo/gvisor/pkg/hostarch" 21 "golang.org/x/sys/unix" 22 ) 23 24 const ( 25 // faultBlockSize is the size used for servicing memory faults. 26 // 27 // This should be large enough to avoid frequent faults and avoid using 28 // all available KVM slots (~512), but small enough that KVM does not 29 // complain about slot sizes (~4GB). See handleBluepillFault for how 30 // this block is used. 31 faultBlockSize = 2 << 30 32 33 // faultBlockMask is the mask for the fault blocks. 34 // 35 // This must be typed to avoid overflow complaints (ugh). 36 faultBlockMask = ^uintptr(faultBlockSize - 1) 37 ) 38 39 // yield yields the CPU. 40 // 41 //go:nosplit 42 func yield() { 43 unix.RawSyscall(unix.SYS_SCHED_YIELD, 0, 0, 0) 44 } 45 46 // calculateBluepillFault calculates the fault address range. 47 // 48 //go:nosplit 49 func calculateBluepillFault(physical uintptr, phyRegions []physicalRegion) (virtualStart, physicalStart, length uintptr, pr *physicalRegion) { 50 alignedPhysical := physical &^ uintptr(hostarch.PageSize-1) 51 for i, pr := range phyRegions { 52 end := pr.physical + pr.length 53 if physical < pr.physical || physical >= end { 54 continue 55 } 56 57 // Adjust the block to match our size. 58 physicalStart = pr.physical + (alignedPhysical-pr.physical)&faultBlockMask 59 virtualStart = pr.virtual + (physicalStart - pr.physical) 60 physicalEnd := physicalStart + faultBlockSize 61 if physicalEnd > end { 62 physicalEnd = end 63 } 64 length = physicalEnd - physicalStart 65 return virtualStart, physicalStart, length, &phyRegions[i] 66 } 67 68 return 0, 0, 0, nil 69 } 70 71 // handleBluepillFault handles a physical fault. 72 // 73 // The corresponding virtual address is returned. This may throw on error. 74 // 75 //go:nosplit 76 func handleBluepillFault(m *machine, physical uintptr, phyRegions []physicalRegion) (uintptr, bool) { 77 // Paging fault: we need to map the underlying physical pages for this 78 // fault. This all has to be done in this function because we're in a 79 // signal handler context. (We can't call any functions that might 80 // split the stack.) 81 virtualStart, physicalStart, length, pr := calculateBluepillFault(physical, phyRegions) 82 if pr == nil { 83 return 0, false 84 } 85 86 // Set the KVM slot. 87 // 88 // First, we need to acquire the exclusive right to set a slot. See 89 // machine.nextSlot for information about the protocol. 90 slot := m.nextSlot.Swap(^uint32(0)) 91 for slot == ^uint32(0) { 92 yield() // Race with another call. 93 slot = m.nextSlot.Swap(^uint32(0)) 94 } 95 flags := _KVM_MEM_FLAGS_NONE 96 if pr.readOnly { 97 flags |= _KVM_MEM_READONLY 98 } 99 errno := m.setMemoryRegion(int(slot), physicalStart, length, virtualStart, flags) 100 if errno == 0 { 101 // Store the physical address in the slot. This is used to 102 // avoid calls to handleBluepillFault in the future (see 103 // machine.mapPhysical). 104 atomic.StoreUintptr(&m.usedSlots[slot], physicalStart) 105 // Successfully added region; we can increment nextSlot and 106 // allow another set to proceed here. 107 m.nextSlot.Store(slot + 1) 108 return virtualStart + (physical - physicalStart), true 109 } 110 111 // Release our slot (still available). 112 m.nextSlot.Store(slot) 113 114 switch errno { 115 case unix.EEXIST: 116 // The region already exists. It's possible that we raced with 117 // another vCPU here. We just revert nextSlot and return true, 118 // because this must have been satisfied by some other vCPU. 119 return virtualStart + (physical - physicalStart), true 120 case unix.EINVAL: 121 throw("set memory region failed; out of slots") 122 case unix.ENOMEM: 123 throw("set memory region failed: out of memory") 124 case unix.EFAULT: 125 throw("set memory region failed: invalid physical range") 126 default: 127 throw("set memory region failed: unknown reason") 128 } 129 130 panic("unreachable") 131 }