github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/platform/kvm/machine_arm64.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build arm64 16 // +build arm64 17 18 package kvm 19 20 import ( 21 "fmt" 22 "runtime" 23 24 "golang.org/x/sys/unix" 25 "github.com/metacubex/gvisor/pkg/abi/linux" 26 "github.com/metacubex/gvisor/pkg/hostarch" 27 "github.com/metacubex/gvisor/pkg/ring0" 28 "github.com/metacubex/gvisor/pkg/ring0/pagetables" 29 "github.com/metacubex/gvisor/pkg/sentry/platform" 30 ) 31 32 type vCPUArchState struct { 33 // PCIDs is the set of PCIDs for this vCPU. 34 // 35 // This starts above fixedKernelPCID. 36 PCIDs *pagetables.PCIDs 37 } 38 39 const ( 40 // fixedKernelPCID is a fixed kernel PCID used for the kernel page 41 // tables. We must start allocating user PCIDs above this in order to 42 // avoid any conflict (see below). 43 fixedKernelPCID = 1 44 45 // poolPCIDs is the number of PCIDs to record in the database. As this 46 // grows, assignment can take longer, since it is a simple linear scan. 47 // Beyond a relatively small number, there are likely few perform 48 // benefits, since the TLB has likely long since lost any translations 49 // from more than a few PCIDs past. 50 poolPCIDs = 128 51 ) 52 53 func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) { 54 applyPhysicalRegions(func(pr physicalRegion) bool { 55 pageTable.Map( 56 hostarch.Addr(ring0.KernelStartAddress|pr.virtual), 57 pr.length, 58 pagetables.MapOpts{AccessType: hostarch.AnyAccess, Global: true}, 59 pr.physical) 60 61 return true // Keep iterating. 62 }) 63 } 64 65 // archPhysicalRegions fills readOnlyGuestRegions and allocates separate 66 // physical regions form them. 67 func archPhysicalRegions(physicalRegions []physicalRegion) []physicalRegion { 68 rdRegions := []virtualRegion{} 69 if err := applyVirtualRegions(func(vr virtualRegion) { 70 if excludeVirtualRegion(vr) { 71 return // skip region. 72 } 73 // Skip PROT_NONE mappings. Go-runtime uses them as place 74 // holders for future read-write mappings. 75 if !vr.accessType.Write && vr.accessType.Read { 76 rdRegions = append(rdRegions, vr) 77 } 78 }); err != nil { 79 panic(fmt.Sprintf("error parsing /proc/self/maps: %v", err)) 80 } 81 82 // Add an unreachable region. 83 rdRegions = append(rdRegions, virtualRegion{ 84 region: region{ 85 virtual: 0xffffffffffffffff, 86 length: 0, 87 }, 88 }) 89 90 var regions []physicalRegion 91 addValidRegion := func(r *physicalRegion, virtual, length uintptr, readOnly bool) { 92 if length == 0 { 93 return 94 } 95 regions = append(regions, physicalRegion{ 96 region: region{ 97 virtual: virtual, 98 length: length, 99 }, 100 physical: r.physical + (virtual - r.virtual), 101 readOnly: readOnly, 102 }) 103 } 104 i := 0 105 for _, pr := range physicalRegions { 106 start := pr.virtual 107 end := pr.virtual + pr.length 108 for start < end { 109 rdRegion := rdRegions[i].region 110 rdStart := rdRegion.virtual 111 rdEnd := rdRegion.virtual + rdRegion.length 112 if rdEnd <= start { 113 i++ 114 continue 115 } 116 if rdStart > start { 117 newEnd := rdStart 118 if end < rdStart { 119 newEnd = end 120 } 121 addValidRegion(&pr, start, newEnd-start, false) 122 start = rdStart 123 continue 124 } 125 if rdEnd < end { 126 addValidRegion(&pr, start, rdEnd-start, true) 127 start = rdEnd 128 continue 129 } 130 addValidRegion(&pr, start, end-start, start >= rdStart && end <= rdEnd) 131 start = end 132 } 133 } 134 135 return regions 136 } 137 138 // nonCanonical generates a canonical address return. 139 // 140 //go:nosplit 141 func nonCanonical(addr uint64, signal int32, info *linux.SignalInfo) (hostarch.AccessType, error) { 142 *info = linux.SignalInfo{ 143 Signo: signal, 144 Code: linux.SI_KERNEL, 145 } 146 info.SetAddr(addr) // Include address. 147 return hostarch.NoAccess, platform.ErrContextSignal 148 } 149 150 // isInstructionAbort returns true if it is an instruction abort. 151 // 152 //go:nosplit 153 func isInstructionAbort(code uint64) bool { 154 value := (code & _ESR_ELx_EC_MASK) >> _ESR_ELx_EC_SHIFT 155 return value == _ESR_ELx_EC_IABT_LOW 156 } 157 158 // isWriteFault returns whether it is a write fault. 159 // 160 //go:nosplit 161 func isWriteFault(code uint64) bool { 162 if isInstructionAbort(code) { 163 return false 164 } 165 166 return (code & _ESR_ELx_WNR) != 0 167 } 168 169 // fault generates an appropriate fault return. 170 // 171 //go:nosplit 172 func (c *vCPU) fault(signal int32, info *linux.SignalInfo) (hostarch.AccessType, error) { 173 bluepill(c) // Probably no-op, but may not be. 174 faultAddr := c.FaultAddr() 175 code, user := c.ErrorCode() 176 if !user { 177 // The last fault serviced by this CPU was not a user 178 // fault, so we can't reliably trust the faultAddr or 179 // the code provided here. We need to re-execute. 180 return hostarch.NoAccess, platform.ErrContextInterrupt 181 } 182 183 // Reset the pointed SignalInfo. 184 *info = linux.SignalInfo{Signo: signal} 185 info.SetAddr(uint64(faultAddr)) 186 accessType := hostarch.AccessType{} 187 if signal == int32(unix.SIGSEGV) { 188 accessType = hostarch.AccessType{ 189 Read: !isWriteFault(uint64(code)), 190 Write: isWriteFault(uint64(code)), 191 Execute: isInstructionAbort(uint64(code)), 192 } 193 } 194 195 ret := code & _ESR_ELx_FSC 196 switch ret { 197 case _ESR_SEGV_MAPERR_L0, _ESR_SEGV_MAPERR_L1, _ESR_SEGV_MAPERR_L2, _ESR_SEGV_MAPERR_L3: 198 info.Code = 1 //SEGV_MAPERR 199 case _ESR_SEGV_ACCERR_L1, _ESR_SEGV_ACCERR_L2, _ESR_SEGV_ACCERR_L3, _ESR_SEGV_PEMERR_L1, _ESR_SEGV_PEMERR_L2, _ESR_SEGV_PEMERR_L3: 200 info.Code = 2 // SEGV_ACCERR. 201 default: 202 info.Code = 2 203 } 204 205 return accessType, platform.ErrContextSignal 206 } 207 208 // getMaxVCPU get max vCPU number 209 func (m *machine) getMaxVCPU() { 210 rmaxVCPUs := runtime.NumCPU() 211 smaxVCPUs, _, errno := unix.RawSyscall(unix.SYS_IOCTL, uintptr(m.fd), KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS) 212 // compare the max vcpu number from runtime and syscall, use smaller one. 213 if errno != 0 { 214 m.maxVCPUs = rmaxVCPUs 215 } else { 216 if rmaxVCPUs < int(smaxVCPUs) { 217 m.maxVCPUs = rmaxVCPUs 218 } else { 219 m.maxVCPUs = int(smaxVCPUs) 220 } 221 } 222 }