github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/platform/kvm/machine_arm64.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build arm64 16 17 package kvm 18 19 import ( 20 "runtime" 21 "sync/atomic" 22 23 "golang.org/x/sys/unix" 24 "github.com/SagerNet/gvisor/pkg/abi/linux" 25 "github.com/SagerNet/gvisor/pkg/hostarch" 26 "github.com/SagerNet/gvisor/pkg/ring0" 27 "github.com/SagerNet/gvisor/pkg/ring0/pagetables" 28 "github.com/SagerNet/gvisor/pkg/sentry/arch/fpu" 29 "github.com/SagerNet/gvisor/pkg/sentry/platform" 30 ) 31 32 type machineArchState struct { 33 //initialvCPUs is the machine vCPUs which has initialized but not used 34 initialvCPUs map[int]*vCPU 35 } 36 37 type vCPUArchState struct { 38 // PCIDs is the set of PCIDs for this vCPU. 39 // 40 // This starts above fixedKernelPCID. 41 PCIDs *pagetables.PCIDs 42 43 // floatingPointState is the floating point state buffer used in guest 44 // to host transitions. See usage in bluepill_arm64.go. 45 floatingPointState fpu.State 46 } 47 48 const ( 49 // fixedKernelPCID is a fixed kernel PCID used for the kernel page 50 // tables. We must start allocating user PCIDs above this in order to 51 // avoid any conflict (see below). 52 fixedKernelPCID = 1 53 54 // poolPCIDs is the number of PCIDs to record in the database. As this 55 // grows, assignment can take longer, since it is a simple linear scan. 56 // Beyond a relatively small number, there are likely few perform 57 // benefits, since the TLB has likely long since lost any translations 58 // from more than a few PCIDs past. 59 poolPCIDs = 128 60 ) 61 62 func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) { 63 applyPhysicalRegions(func(pr physicalRegion) bool { 64 pageTable.Map( 65 hostarch.Addr(ring0.KernelStartAddress|pr.virtual), 66 pr.length, 67 pagetables.MapOpts{AccessType: hostarch.AnyAccess, Global: true}, 68 pr.physical) 69 70 return true // Keep iterating. 71 }) 72 } 73 74 // Get all read-only physicalRegions. 75 func rdonlyRegionsForSetMem() (phyRegions []physicalRegion) { 76 var rdonlyRegions []region 77 78 applyVirtualRegions(func(vr virtualRegion) { 79 if excludeVirtualRegion(vr) { 80 return 81 } 82 83 if !vr.accessType.Write && vr.accessType.Read { 84 rdonlyRegions = append(rdonlyRegions, vr.region) 85 } 86 87 // TODO(github.com/SagerNet/issue/2686): PROT_NONE should be specially treated. 88 // Workaround: treated as rdonly temporarily. 89 if !vr.accessType.Write && !vr.accessType.Read && !vr.accessType.Execute { 90 rdonlyRegions = append(rdonlyRegions, vr.region) 91 } 92 }) 93 94 for _, r := range rdonlyRegions { 95 physical, _, ok := translateToPhysical(r.virtual) 96 if !ok { 97 continue 98 } 99 100 phyRegions = append(phyRegions, physicalRegion{ 101 region: region{ 102 virtual: r.virtual, 103 length: r.length, 104 }, 105 physical: physical, 106 }) 107 } 108 109 return phyRegions 110 } 111 112 // Get all available physicalRegions. 113 func availableRegionsForSetMem() (phyRegions []physicalRegion) { 114 var excludeRegions []region 115 applyVirtualRegions(func(vr virtualRegion) { 116 if !vr.accessType.Write { 117 excludeRegions = append(excludeRegions, vr.region) 118 } 119 }) 120 121 phyRegions = computePhysicalRegions(excludeRegions) 122 123 return phyRegions 124 } 125 126 // nonCanonical generates a canonical address return. 127 // 128 //go:nosplit 129 func nonCanonical(addr uint64, signal int32, info *linux.SignalInfo) (hostarch.AccessType, error) { 130 *info = linux.SignalInfo{ 131 Signo: signal, 132 Code: linux.SI_KERNEL, 133 } 134 info.SetAddr(addr) // Include address. 135 return hostarch.NoAccess, platform.ErrContextSignal 136 } 137 138 // isInstructionAbort returns true if it is an instruction abort. 139 // 140 //go:nosplit 141 func isInstructionAbort(code uint64) bool { 142 value := (code & _ESR_ELx_EC_MASK) >> _ESR_ELx_EC_SHIFT 143 return value == _ESR_ELx_EC_IABT_LOW 144 } 145 146 // isWriteFault returns whether it is a write fault. 147 // 148 //go:nosplit 149 func isWriteFault(code uint64) bool { 150 if isInstructionAbort(code) { 151 return false 152 } 153 154 return (code & _ESR_ELx_WNR) != 0 155 } 156 157 // fault generates an appropriate fault return. 158 // 159 //go:nosplit 160 func (c *vCPU) fault(signal int32, info *linux.SignalInfo) (hostarch.AccessType, error) { 161 bluepill(c) // Probably no-op, but may not be. 162 faultAddr := c.GetFaultAddr() 163 code, user := c.ErrorCode() 164 165 if !user { 166 // The last fault serviced by this CPU was not a user 167 // fault, so we can't reliably trust the faultAddr or 168 // the code provided here. We need to re-execute. 169 return hostarch.NoAccess, platform.ErrContextInterrupt 170 } 171 172 // Reset the pointed SignalInfo. 173 *info = linux.SignalInfo{Signo: signal} 174 info.SetAddr(uint64(faultAddr)) 175 176 ret := code & _ESR_ELx_FSC 177 switch ret { 178 case _ESR_SEGV_MAPERR_L0, _ESR_SEGV_MAPERR_L1, _ESR_SEGV_MAPERR_L2, _ESR_SEGV_MAPERR_L3: 179 info.Code = 1 //SEGV_MAPERR 180 case _ESR_SEGV_ACCERR_L1, _ESR_SEGV_ACCERR_L2, _ESR_SEGV_ACCERR_L3, _ESR_SEGV_PEMERR_L1, _ESR_SEGV_PEMERR_L2, _ESR_SEGV_PEMERR_L3: 181 info.Code = 2 // SEGV_ACCERR. 182 default: 183 info.Code = 2 184 } 185 186 accessType := hostarch.AccessType{ 187 Read: !isWriteFault(uint64(code)), 188 Write: isWriteFault(uint64(code)), 189 Execute: isInstructionAbort(uint64(code)), 190 } 191 192 return accessType, platform.ErrContextSignal 193 } 194 195 // getMaxVCPU get max vCPU number 196 func (m *machine) getMaxVCPU() { 197 rmaxVCPUs := runtime.NumCPU() 198 smaxVCPUs, _, errno := unix.RawSyscall(unix.SYS_IOCTL, uintptr(m.fd), _KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS) 199 // compare the max vcpu number from runtime and syscall, use smaller one. 200 if errno != 0 { 201 m.maxVCPUs = rmaxVCPUs 202 } else { 203 if rmaxVCPUs < int(smaxVCPUs) { 204 m.maxVCPUs = rmaxVCPUs 205 } else { 206 m.maxVCPUs = int(smaxVCPUs) 207 } 208 } 209 } 210 211 // getNewVCPU() scan for an available vCPU from initialvCPUs 212 func (m *machine) getNewVCPU() *vCPU { 213 for CID, c := range m.initialvCPUs { 214 if atomic.CompareAndSwapUint32(&c.state, vCPUReady, vCPUUser) { 215 delete(m.initialvCPUs, CID) 216 return c 217 } 218 } 219 return nil 220 }